Overview

In this script, we study a few of the genes that showed up in the GTEX V6 analysis or the single cell analysis (Deng 2014) and see whether they are tissue specific and whether their expression is upregulated and/or downregulated in the different tissues.

Reading GTEx data

rm(list=ls())
library(data.table)
#install_github('kkdey/maptpx') 
library(maptpx)
## Loading required package: slam
library(CountClust)
## Loading required package: ggplot2
library(data.table)
data <- data.frame(fread('../external_data/GTEX_V6/cis_gene_expression.txt'));
## Warning in fread("../external_data/GTEX_V6/cis_gene_expression.txt"):
## Starting data input on line 2 and discarding line 1 because it has too
## few or too many items to be column names or data: "V1" "V3" "V4" "V5"
## "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18"
## "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27" "V28" "V29" "V30"
## "V31" "V32" "V33" "V34" "V35" "V36" "V37" "V38" "V39" "V40" "V41" "V42"
## "V43" "V44" "V45" "V46" "V47" "V48" "V49" "V50" "V51" "V52" "V53" "V54"
## "V55" "V56" "V57" "V58" "V59" "V60" "V61" "V62" "V63" "V64" "V65" "V66"
## "V67" "V68" "V69" "V70" "V71" "V72" "V73" "V74" "V75" "V76" "V77" "V78"
## "V79" "V80" "V81" "V82" "V83" "V84" "V85" "V86" "V87" "V88" "V89" "V90"
## "V91" "V92" "V93" "V94" "V95" "V96" "V97" "V98" "V99" "V100" "V101" "V102"
## "V103" "V104" "V105" "V106" "V107" "V108" "V109" "V110" "V111" "V112"
## "V113" "V114" "V115" "V116" "V117" "V118" "V119" "V120" "V121" "V122"
## "V123" "V124" "V125" "V126" "V127" "V128" "V129" "V130" "V131" "V132"
## "V133" "V134" "V135" "V136" "V137" "V138" "V139" "V140" "V141" "V142 [...
## truncated]
## 
Read 0.0% of 16069 rows
Read 62.2% of 16069 rows
Read 16069 rows and 8557 (of 8557) columns from 0.532 GB file in 00:00:10
matdata <- data[,-(1:2)];
samples_id=read.table("../external_data/GTEX_V6/samples_id.txt")[,3];

Studying HBA1, HBA2 and HBB

We first study the haemoglobin alpha and the haemoglobin beta genes that seem to be the primary driver genes separating the whole blood tissue samples from the other tissue samples. We plot the log reads expression of each of these three genes against the different tissue sample labels.

ensemble_ids <- c("ENSG00000206172", "ENSG00000188536", "ENSG00000244734")
gene_names <- data[,2];

samples_id <- as.character(samples_id)
samples_id[grep("Nucleus", samples_id)] = "Brain -N. accumbens (basal ganglia)"
samples_id[grep("Gastroe", samples_id)] = "Esophagus -Gastroesophageal Jn."
samples_id[grep("cingulate", samples_id)] = "Brain - Anterior cortex (BA24)."
#samples_id[grep("Brain", samples_id)] = "Brain"
samples_id <- as.factor(samples_id)

HBA1

HBA1 expression pattern

index <- grep("ENSG00000206172", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="HBA1 expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);

#dev.off()

HBA2

HBA2 expression pattern

index <- grep("ENSG00000188536", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="HBA2 expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);

#dev.off()

HBB

HBB expression

index <- grep("ENSG00000244734", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="HBB expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);

#dev.off()

PHF7

HBB expression

index <- grep("ENSG00000010318", gene_names)
par(mar=c(12,2,2,3))
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="PHF7 log gene expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);

#dev.off()

PRM1

HBB expression

index <- grep("ENSG00000175646", gene_names)
par(mar=c(12,2,2,3))
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="PRM1 log gene expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);

#dev.off()

PRM2

HBB expression

index <- grep("ENSG00000122304", gene_names)
par(mar=c(12,2,2,3))
plot(1:8555, log(matdata[index,]+1), type="l", col="red",  ylab="log expr.", main="PRM2 log gene expression", xaxt="n")

labels = match(unique(samples_id), samples_id);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);

#dev.off()

Brain GTEx genes study

We filter out the tissue samples coming from the brain only.

brain_indices <- grep("Brain", samples_id)
brain_matdata <- matdata[,brain_indices];
brain_ids <- samples_id[brain_indices];

MBP gene

We study the MBP gene which was found to be significantly enriched in brain spinal cord.

index <- grep("ENSG00000197971",gene_names);
brain_ids_ordered <- brain_ids[order(brain_ids)]
plot(1:length(brain_indices), log(brain_matdata[index,order(brain_ids)]+1), type="l", col="red",  ylab="log expr.", main="MBP expression", xaxt="n", xlab="")

labels = match(unique(brain_ids_ordered), brain_ids_ordered);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],1259);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(brain_ids_ordered),las=3, cex.axis=0.8);

#dev.off()

Deng 2014 analysis- Data preprocessing

reads <- data.frame(fread('../external_data/Deng_Data/Deng_cell_data.txt'),row.names=1);
## Warning in fread("../external_data/Deng_Data/Deng_cell_data.txt"): Starting
## data input on line 2 and discarding line 1 because it has too few or too
## many items to be column names or data: "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "4cell" "4cell"
## "4cell" "4cell" "4cell" "4cell" "4cell" "4cell" "4cell" "4cell" "4cell"
## "4cell" "4cell" "4cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell"
## "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell"
## "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell"
## "8cell" "8cell" "8cell" "8cell" "BXC" "BXC" "BXC" "BXC" "BXC" "BXC" "BXC"
## "BXC" "BXC" "BXC" "BXC" "BXC" "BXC" "C57twocell" "C [... truncated]
files <- list.files("../external_data/Deng_Data/Deng_files/");
cell_meta <- unlist(lapply(files, function(x) strsplit(x,"_")[[1]][2]));
embryo_id <- unlist(lapply(files, function(x) strsplit(strsplit(x,"_")[[1]][3],"-")[[1]][1]));
cell_meta[grep("zy",cell_meta)]="zy";
cell_meta[grep("smartseq2", files)]="8cell_nd";
cell_meta[grep("8cell_2pooled", files)]="8cell_nd";
cell_meta[grep("8cell_split", files)]="8cell_nd";
cell_meta[grep("16cell_2pooled", files)]="16cell_nd";
cell_meta[grep("16cell_split", files)]="16cell_nd";
indices_not_reqd <- which(cell_meta=="BXC"   | cell_meta=="C57twocell" | cell_meta=="fibroblast" | cell_meta =="8cell_nd" | cell_meta == "16cell_nd");
cell_meta <- cell_meta[-indices_not_reqd];
embryo_id <- embryo_id[-indices_not_reqd];
embryo_id[which(embryo_id == "expression.txt")]="."
cell_embryo <- paste0(cell_meta,"_", embryo_id);
reads <- reads[,-indices_not_reqd];
cell_meta_unique <- c("zy","early2cell","mid2cell","late2cell","4cell","8cell","16cell","earlyblast","midblast","lateblast") ;
order_of_development <- order(match(cell_meta,cell_meta_unique))
reads <- reads[,order_of_development];
cell_meta <- cell_meta[order_of_development]
cell_embryo <- cell_embryo[order_of_development];
colnames(reads) <- cell_meta;

names(cell_meta) <- "dev. phase"
names(cell_embryo) <- "dev.phase.embryo"

write.table(cell_meta, "../external_data/Deng_Data/cell_labels_phase.txt")
write.table(cell_embryo, "../external_data/Deng_Data/cell_labels_phase_embryo.txt")

Deng2014 developmental genes study

Bcl210

Bcl210 gene expression study, a gene which seems to be a pre-cursor for early development, zygote and early 2-cell.

index <- grep("Bcl2l10", rownames(reads))
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red",  ylab="log expr.", main="Bcl2l10 expression", xaxt="n", xlab="")

labels = match(unique(cell_meta), cell_meta);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);

#dev.off()

Tcl1

Tcl1 expression study (there are 5 occurrences - may be isoforms? We show for one…patterns similar otherwise). It is also a precursor for zygote and early 2-cell cluster.

index <- grep("Tcl1", rownames(reads))[2]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red",  ylab="log expr.", main="Tcl1 expression", xaxt="n", xlab="")

labels = match(unique(cell_meta), cell_meta);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);

#dev.off()

Actb

Actb gene expression profile. It is a precursor for slightly late development stages, blastocyst.

index <- grep("Actb", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red",  ylab="log expr.", main="Actb expression", xaxt="n", xlab="")

labels = match(unique(cell_meta), cell_meta);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);

#dev.off()

Fbxo15

Fbxo15 gene expression profile.

index <- grep("Fbxo15", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red",  ylab="log expr.", main="Fbxo15 expression", xaxt="n", xlab="")

labels = match(unique(cell_meta), cell_meta);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);

#dev.off()

Tceb1

Tceb1 gene expression profile.

index <- grep("Tceb1", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red",  ylab="log expr.", main="Tceb1 expression", xaxt="n", xlab="")

labels = match(unique(cell_meta), cell_meta);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);

#dev.off()

Hsp90ab1

Hsp90ab1 gene expression profile.

index <- grep("Hsp90ab1", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red",  ylab="log expr.", main="Tceb1 expression", xaxt="n", xlab="")

labels = match(unique(cell_meta), cell_meta);
abline(v=labels)

labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);

axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);

#dev.off()