In this script, we study a few of the genes that showed up in the GTEX V6 analysis or the single cell analysis (Deng 2014) and see whether they are tissue specific and whether their expression is upregulated and/or downregulated in the different tissues.
rm(list=ls())
library(data.table)
#install_github('kkdey/maptpx')
library(maptpx)
## Loading required package: slam
library(CountClust)
## Loading required package: ggplot2
library(data.table)
data <- data.frame(fread('../external_data/GTEX_V6/cis_gene_expression.txt'));
## Warning in fread("../external_data/GTEX_V6/cis_gene_expression.txt"):
## Starting data input on line 2 and discarding line 1 because it has too
## few or too many items to be column names or data: "V1" "V3" "V4" "V5"
## "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18"
## "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27" "V28" "V29" "V30"
## "V31" "V32" "V33" "V34" "V35" "V36" "V37" "V38" "V39" "V40" "V41" "V42"
## "V43" "V44" "V45" "V46" "V47" "V48" "V49" "V50" "V51" "V52" "V53" "V54"
## "V55" "V56" "V57" "V58" "V59" "V60" "V61" "V62" "V63" "V64" "V65" "V66"
## "V67" "V68" "V69" "V70" "V71" "V72" "V73" "V74" "V75" "V76" "V77" "V78"
## "V79" "V80" "V81" "V82" "V83" "V84" "V85" "V86" "V87" "V88" "V89" "V90"
## "V91" "V92" "V93" "V94" "V95" "V96" "V97" "V98" "V99" "V100" "V101" "V102"
## "V103" "V104" "V105" "V106" "V107" "V108" "V109" "V110" "V111" "V112"
## "V113" "V114" "V115" "V116" "V117" "V118" "V119" "V120" "V121" "V122"
## "V123" "V124" "V125" "V126" "V127" "V128" "V129" "V130" "V131" "V132"
## "V133" "V134" "V135" "V136" "V137" "V138" "V139" "V140" "V141" "V142 [...
## truncated]
##
Read 0.0% of 16069 rows
Read 62.2% of 16069 rows
Read 16069 rows and 8557 (of 8557) columns from 0.532 GB file in 00:00:10
matdata <- data[,-(1:2)];
samples_id=read.table("../external_data/GTEX_V6/samples_id.txt")[,3];
We first study the haemoglobin alpha and the haemoglobin beta genes that seem to be the primary driver genes separating the whole blood tissue samples from the other tissue samples. We plot the log reads expression of each of these three genes against the different tissue sample labels.
ensemble_ids <- c("ENSG00000206172", "ENSG00000188536", "ENSG00000244734")
gene_names <- data[,2];
samples_id <- as.character(samples_id)
samples_id[grep("Nucleus", samples_id)] = "Brain -N. accumbens (basal ganglia)"
samples_id[grep("Gastroe", samples_id)] = "Esophagus -Gastroesophageal Jn."
samples_id[grep("cingulate", samples_id)] = "Brain - Anterior cortex (BA24)."
#samples_id[grep("Brain", samples_id)] = "Brain"
samples_id <- as.factor(samples_id)
HBA1 expression pattern
index <- grep("ENSG00000206172", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="HBA1 expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);
#dev.off()
HBA2 expression pattern
index <- grep("ENSG00000188536", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="HBA2 expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);
#dev.off()
HBB expression
index <- grep("ENSG00000244734", gene_names)
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="HBB expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);
#dev.off()
HBB expression
index <- grep("ENSG00000010318", gene_names)
par(mar=c(12,2,2,3))
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="PHF7 log gene expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);
#dev.off()
HBB expression
index <- grep("ENSG00000175646", gene_names)
par(mar=c(12,2,2,3))
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="PRM1 log gene expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);
#dev.off()
HBB expression
index <- grep("ENSG00000122304", gene_names)
par(mar=c(12,2,2,3))
plot(1:8555, log(matdata[index,]+1), type="l", col="red", ylab="log expr.", main="PRM2 log gene expression", xaxt="n")
labels = match(unique(samples_id), samples_id);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],8555);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(samples_id),las=3, cex.axis=0.8);
#dev.off()
We filter out the tissue samples coming from the brain only.
brain_indices <- grep("Brain", samples_id)
brain_matdata <- matdata[,brain_indices];
brain_ids <- samples_id[brain_indices];
We study the MBP gene which was found to be significantly enriched in brain spinal cord.
index <- grep("ENSG00000197971",gene_names);
brain_ids_ordered <- brain_ids[order(brain_ids)]
plot(1:length(brain_indices), log(brain_matdata[index,order(brain_ids)]+1), type="l", col="red", ylab="log expr.", main="MBP expression", xaxt="n", xlab="")
labels = match(unique(brain_ids_ordered), brain_ids_ordered);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],1259);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(brain_ids_ordered),las=3, cex.axis=0.8);
#dev.off()
reads <- data.frame(fread('../external_data/Deng_Data/Deng_cell_data.txt'),row.names=1);
## Warning in fread("../external_data/Deng_Data/Deng_cell_data.txt"): Starting
## data input on line 2 and discarding line 1 because it has too few or too
## many items to be column names or data: "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "16cell"
## "16cell" "16cell" "16cell" "16cell" "16cell" "16cell" "4cell" "4cell"
## "4cell" "4cell" "4cell" "4cell" "4cell" "4cell" "4cell" "4cell" "4cell"
## "4cell" "4cell" "4cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell"
## "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell"
## "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell" "8cell"
## "8cell" "8cell" "8cell" "8cell" "BXC" "BXC" "BXC" "BXC" "BXC" "BXC" "BXC"
## "BXC" "BXC" "BXC" "BXC" "BXC" "BXC" "C57twocell" "C [... truncated]
files <- list.files("../external_data/Deng_Data/Deng_files/");
cell_meta <- unlist(lapply(files, function(x) strsplit(x,"_")[[1]][2]));
embryo_id <- unlist(lapply(files, function(x) strsplit(strsplit(x,"_")[[1]][3],"-")[[1]][1]));
cell_meta[grep("zy",cell_meta)]="zy";
cell_meta[grep("smartseq2", files)]="8cell_nd";
cell_meta[grep("8cell_2pooled", files)]="8cell_nd";
cell_meta[grep("8cell_split", files)]="8cell_nd";
cell_meta[grep("16cell_2pooled", files)]="16cell_nd";
cell_meta[grep("16cell_split", files)]="16cell_nd";
indices_not_reqd <- which(cell_meta=="BXC" | cell_meta=="C57twocell" | cell_meta=="fibroblast" | cell_meta =="8cell_nd" | cell_meta == "16cell_nd");
cell_meta <- cell_meta[-indices_not_reqd];
embryo_id <- embryo_id[-indices_not_reqd];
embryo_id[which(embryo_id == "expression.txt")]="."
cell_embryo <- paste0(cell_meta,"_", embryo_id);
reads <- reads[,-indices_not_reqd];
cell_meta_unique <- c("zy","early2cell","mid2cell","late2cell","4cell","8cell","16cell","earlyblast","midblast","lateblast") ;
order_of_development <- order(match(cell_meta,cell_meta_unique))
reads <- reads[,order_of_development];
cell_meta <- cell_meta[order_of_development]
cell_embryo <- cell_embryo[order_of_development];
colnames(reads) <- cell_meta;
names(cell_meta) <- "dev. phase"
names(cell_embryo) <- "dev.phase.embryo"
write.table(cell_meta, "../external_data/Deng_Data/cell_labels_phase.txt")
write.table(cell_embryo, "../external_data/Deng_Data/cell_labels_phase_embryo.txt")
Bcl210 gene expression study, a gene which seems to be a pre-cursor for early development, zygote and early 2-cell.
index <- grep("Bcl2l10", rownames(reads))
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red", ylab="log expr.", main="Bcl2l10 expression", xaxt="n", xlab="")
labels = match(unique(cell_meta), cell_meta);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);
#dev.off()
Tcl1 expression study (there are 5 occurrences - may be isoforms? We show for one…patterns similar otherwise). It is also a precursor for zygote and early 2-cell cluster.
index <- grep("Tcl1", rownames(reads))[2]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red", ylab="log expr.", main="Tcl1 expression", xaxt="n", xlab="")
labels = match(unique(cell_meta), cell_meta);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);
#dev.off()
Actb gene expression profile. It is a precursor for slightly late development stages, blastocyst.
index <- grep("Actb", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red", ylab="log expr.", main="Actb expression", xaxt="n", xlab="")
labels = match(unique(cell_meta), cell_meta);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);
#dev.off()
Fbxo15 gene expression profile.
index <- grep("Fbxo15", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red", ylab="log expr.", main="Fbxo15 expression", xaxt="n", xlab="")
labels = match(unique(cell_meta), cell_meta);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);
#dev.off()
Tceb1 gene expression profile.
index <- grep("Tceb1", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red", ylab="log expr.", main="Tceb1 expression", xaxt="n", xlab="")
labels = match(unique(cell_meta), cell_meta);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);
#dev.off()
Hsp90ab1 gene expression profile.
index <- grep("Hsp90ab1", rownames(reads))[1]
plot(1:dim(reads)[2], log(reads[index,]+1), type="l", col="red", ylab="log expr.", main="Tceb1 expression", xaxt="n", xlab="")
labels = match(unique(cell_meta), cell_meta);
abline(v=labels)
labels_low=labels;
labels_up=c(labels[2:length(labels)],dim(reads)[2]);
mid_point=labels_low +0.5*(labels_up-labels_low);
axis(1,at=mid_point, unique(cell_meta),las=3, cex.axis=0.8);
#dev.off()