In this script, we study a few of the genes that showed up in the GTEX V6 analysis or the single cell analysis (Deng 2014) and see whether they are tissue specific and whether their expression is upregulated and/or downregulated in the different tissues.
rm(list=ls())
library(data.table)
#install_github('kkdey/maptpx')
library(maptpx)
## Loading required package: slam
library(CountClust)
## Loading required package: ggplot2
library(data.table)
data <- data.frame(fread('../external_data/GTEX_V6/cis_gene_expression.txt'));
## Warning in fread("../external_data/GTEX_V6/cis_gene_expression.txt"):
## Starting data input on line 2 and discarding line 1 because it has too
## few or too many items to be column names or data: "V1" "V3" "V4" "V5"
## "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18"
## "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27" "V28" "V29" "V30"
## "V31" "V32" "V33" "V34" "V35" "V36" "V37" "V38" "V39" "V40" "V41" "V42"
## "V43" "V44" "V45" "V46" "V47" "V48" "V49" "V50" "V51" "V52" "V53" "V54"
## "V55" "V56" "V57" "V58" "V59" "V60" "V61" "V62" "V63" "V64" "V65" "V66"
## "V67" "V68" "V69" "V70" "V71" "V72" "V73" "V74" "V75" "V76" "V77" "V78"
## "V79" "V80" "V81" "V82" "V83" "V84" "V85" "V86" "V87" "V88" "V89" "V90"
## "V91" "V92" "V93" "V94" "V95" "V96" "V97" "V98" "V99" "V100" "V101" "V102"
## "V103" "V104" "V105" "V106" "V107" "V108" "V109" "V110" "V111" "V112"
## "V113" "V114" "V115" "V116" "V117" "V118" "V119" "V120" "V121" "V122"
## "V123" "V124" "V125" "V126" "V127" "V128" "V129" "V130" "V131" "V132"
## "V133" "V134" "V135" "V136" "V137" "V138" "V139" "V140" "V141" "V142 [...
## truncated]
##
Read 0.0% of 16069 rows
Read 62.2% of 16069 rows
Read 16069 rows and 8557 (of 8557) columns from 0.532 GB file in 00:00:10
matdata <- data[,-(1:2)];
samples_id=read.table("../external_data/GTEX_V6/samples_id.txt")[,3];
We first study the haemoglobin alpha and the haemoglobin beta genes that seem to be the primary driver genes separating the whole blood tissue samples from the other tissue samples. We plot the log reads expression of each of these three genes against the different tissue sample labels.
ensemble_ids <- c("ENSG00000206172", "ENSG00000188536", "ENSG00000244734")
gene_names <- data[,2];
samples_id <- as.character(samples_id)
samples_id[grep("Nucleus", samples_id)] = "Brain -N. accumbens (basal ganglia)"
samples_id[grep("Gastroe", samples_id)] = "Esophagus -Gastroesophageal Jn."
samples_id[grep("cingulate", samples_id)] = "Brain - Anterior cortex (BA24)."
#samples_id[grep("Brain", samples_id)] = "Brain"
samples_id <- as.factor(samples_id)
HBA1 expression pattern
index <- grep("ENSG00000206172", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="HBA1 expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);
#dev.off()
HBA2 expression pattern
index <- grep("ENSG00000188536", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="HBA2 expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);