-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathRUVseq_with_empirical.r
101 lines (92 loc) · 3.73 KB
/
RUVseq_with_empirical.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
options(warn=-1)
############################## RUVSEQ Normalization ##############################
##### Installing Package RUVSeq ############
if (!require("devtools")) install.packages("devtools",repos="http://cran.us.r-project.org")
print ("Loading required dependencies for devtools")
library(withr)
library(httr)
library(curl)
library(devtools)
source("https://bioconductor.org/biocLite.R")
if (!require("RUVSeq")) biocLite("RUVSeq")
library(RUVSeq)
############################################
####### Reading Command Line Arguments ########
args = commandArgs(trailingOnly=TRUE)
path <- args[1]
filenameR <- args[2]
NumberofControl <- as.numeric(args[3])
NumberofTreatment <- as.numeric(args[4])
Counts <- as.numeric(args[5])
NumberofSamples <- as.numeric(args[6])
##############################################
setwd(path)
file <- filenameR
countData <- as.matrix(read.table(file,sep="\t",header=TRUE,row.names=1,check.names=F))
print ("This is how your input data looks")
head(countData)
print ("Filtering and exploratory data analysis")
## Filtering and exploratory data analysis
filter <- apply(countData, 1, function(x) length(x[x>Counts])>=NumberofSamples)
filtered <- countData[filter,]
genes <- rownames(filtered)
aa <- length(genes)
Controlrep <- rep("Ctl",NumberofControl)
Treatmentrep <- rep("Trt",NumberofTreatment)
x <- as.factor(c(Controlrep,Treatmentrep))
x
set <- newSeqExpressionSet(as.matrix(filtered), phenoData = data.frame(x, row.names=colnames(filtered)))
library(RColorBrewer)
colors <- brewer.pal(3, "Set2")
outfile <- paste0(file, "_Relative", "_Expression", "_NoNormalization", ".pdf")
pdf(outfile)
plotRLE(set, outline=FALSE, ylim=c(-4, 4), col=colors[x])
outfile <- paste0(file, "_PCA", "_NoNormalization", ".pdf")
pdf(outfile)
plotPCA(set, col=colors[x], cex=1.2)
print ("Performing Upper Quantile Normalization")
##upper quartlie normalization
set <- betweenLaneNormalization(set, which="upper")
outfile <- paste0(file, "_Relative", "_Expression", "_UQNormalization", ".pdf")
pdf(outfile)
plotRLE(set, outline=FALSE, ylim=c(-4, 4), col=colors[x])
outfile <- paste0(file, "_PCA", "_UQNormalization", ".pdf")
pdf(outfile)
plotPCA(set, col=colors[x], cex=1.2)
print ("Performing Empirical gene normalization with RUVg")
##empirical gene normalization with RUVg
design <- model.matrix(~x, data=pData(set))
y <- DGEList(counts=counts(set), group=x)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
lrt <- glmLRT(fit, coef=2)
top <- topTags(lrt, n=nrow(set))$table
empirical <- rownames(set)[which(!(rownames(set) %in% rownames(top)[1:5000]))]
set2 <- RUVg(set, empirical, k=1)
pData(set2)
outfile <- paste0(file, "_Relative", "_Expression", "_EmpiricalGeneNormalization", ".pdf")
outfile1 <- paste0(file, "_NormalizedCounts", ".txt")
pdf(outfile)
plotRLE(set2, outline=FALSE, ylim=c(-4, 4), col=colors[x])
outfile <- paste0(file, "_PCA", "_EmpiricalGeneNormalization", ".pdf")
pdf(outfile)
plotPCA(set2, col=colors[x], cex=1.2)
normalizedCounts_data <- normCounts(set2)
write.table(normalizedCounts_data,outfile1,sep="\t",quote=F)
print ("Performing DE using EDGER package")
##Differential expression analysis using Empirical RUVg
design <- model.matrix(~x+W_1, data=pData(set2))
y <- DGEList(counts=counts(set2), group=x)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
lrt <- glmLRT(fit, coef=2)
print ("Showing top DE genes")
topTags(lrt)
result1 <- topTags(lrt, n=dim(y)[1]+1, adjust.method="BH", sort.by="logFC")
write.table(result1,file = "temporaryfile.txt",sep = "\t",quote=FALSE)
dev.off()
########################### END RUVSEq #############################