Overview

In this script, we study a few of the genes that showed up in the GTEX V6 analysis or the single cell analysis (Deng 2014) and see whether they are tissue specific and whether their expression is upregulated and/or downregulated in the different tissues.

Reading GTEx data

rm(list=ls())
library(data.table)
#install_github('kkdey/maptpx') 
library(maptpx)
## Loading required package: slam
library(CountClust)
## Loading required package: ggplot2
library(data.table)
data <- data.frame(fread('../external_data/GTEX_V6/cis_gene_expression.txt'));
## Warning in fread("../external_data/GTEX_V6/cis_gene_expression.txt"):
## Starting data input on line 2 and discarding line 1 because it has too
## few or too many items to be column names or data: "V1" "V3" "V4" "V5"
## "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18"
## "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27" "V28" "V29" "V30"
## "V31" "V32" "V33" "V34" "V35" "V36" "V37" "V38" "V39" "V40" "V41" "V42"
## "V43" "V44" "V45" "V46" "V47" "V48" "V49" "V50" "V51" "V52" "V53" "V54"
## "V55" "V56" "V57" "V58" "V59" "V60" "V61" "V62" "V63" "V64" "V65" "V66"
## "V67" "V68" "V69" "V70" "V71" "V72" "V73" "V74" "V75" "V76" "V77" "V78"
## "V79" "V80" "V81" "V82" "V83" "V84" "V85" "V86" "V87" "V88" "V89" "V90"
## "V91" "V92" "V93" "V94" "V95" "V96" "V97" "V98" "V99" "V100" "V101" "V102"
## "V103" "V104" "V105" "V106" "V107" "V108" "V109" "V110" "V111" "V112"
## "V113" "V114" "V115" "V116" "V117" "V118" "V119" "V120" "V121" "V122"
## "V123" "V124" "V125" "V126" "V127" "V128" "V129" "V130" "V131" "V132"
## "V133" "V134" "V135" "V136" "V137" "V138" "V139" "V140" "V141" "V142 [...
## truncated]
## 
Read 0.0% of 16069 rows
Read 62.2% of 16069 rows
Read 16069 rows and 8557 (of 8557) columns from 0.532 GB file in 00:00:10
matdata <- data[,-(1:2)];
samples_id=read.table("../external_data/GTEX_V6/samples_id.txt")[,3];

Studying HBA1, HBA2 and HBB

We first study the haemoglobin alpha and the haemoglobin beta genes that seem to be the primary driver genes separating the whole blood tissue samples from the other tissue samples. We plot the log reads expression of each of these three genes against the different tissue sample labels.

ensemble_ids <- c("ENSG00000206172", "ENSG00000188536", "ENSG00000244734")
gene_names <- data[,2];

samples_id <- as.character(samples_id)
samples_id[grep("Nucleus", samples_id)] = "Brain -N. accumbens (basal ganglia)"
samples_id[grep("Gastroe", samples_id)] = "Esophagus -Gastroesophageal Jn."
samples_id[grep("cingulate", samples_id)] = "Brain - Anterior cortex (BA24)."
#samples_id[grep("Brain", samples_id)] = "Brain"
samples_id <- as.factor(samples_id)

HBA1

HBA1 expression pattern

index <- grep("ENSG00000206172", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="HBA1 expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);

#dev.off()

HBA2

HBA2 expression pattern

index <- grep("ENSG00000188536", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="HBA2 expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);