# params: rround <- function(V) { Rest=V-trunc(V); if(Rest>=0.5) { return(trunc(V)+1); } else { return(trunc(V)); } } library(survival); Data = read.delim("Data.txt", header=TRUE, sep="\t"); Filter=which(apply(Data[1,],2,function(x)all(is.na(x)))); if(length(Filter)!=0) Data = Data[,-Filter]; GeneCnt = dim(Data)[1]-2; SampleCnt = dim(Data)[2]-1; Time = as.numeric(Data[1,]); Time = Time[-1]; Status = as.numeric(Data[2,]); Status = Status[-1]; Status = replace(Status, Status==2, 0); Status = replace(Status, Status==3, 0); Quart=rround(0.25*SampleCnt); QuartData=matrix(0, GeneCnt*3, Quart*2); for(i in 1:GeneCnt) { SortedData=sort(as.numeric(Data[2+i, 2:(SampleCnt+1)]), index=TRUE); TimeS=Time[SortedData$ix]; StatusS=Status[SortedData$ix]; QuartData[((i-1)*3+1),]=as.numeric(TimeS[-(Quart+1):-(SampleCnt-Quart)]); QuartData[((i-1)*3+2),]=as.numeric(StatusS[-(Quart+1):-(SampleCnt-Quart)]); QuartData[((i-1)*3+3),]=as.numeric(SortedData$x[-(Quart+1):-(SampleCnt-Quart)]); } #exit(0); Results=NULL; Results$GeneNames = as.character(Data[,1]); Results$GeneNames = Results$GeneNames [-1:-2]; #Results$Median=numeric(GeneCnt); Results$PValues=numeric(GeneCnt); #Groups=matrix(0,GeneCnt, SampleCnt); Grouping=numeric(Quart*2) Grouping[(Quart+1):(2*Quart)]=1 system("rm -rf KM-Plots; mkdir KM-Plots"); setwd("KM-Plots/"); for(i in 1:GeneCnt) { #Results$Median[i] = median(as.numeric(Data[2+i,2:(SampleCnt+1)])); #for(j in 1:SampleCnt) #if(Data[2+i,1+j]>=Results$Median[i]) #Groups[i, j]=1; sdf = survdiff(Surv(QuartData[((i-1)*3+1),], QuartData[((i-1)*3+2),]) ~ Grouping); Results$PValues[i] = 1 - pchisq(sdf$chisq, length(sdf$n) - 1); if(Results$PValues[i]<0.05) { pdf(paste(Results$GeneNames[i],".pdf", sep="")); plot(survfit(Surv(QuartData[((i-1)*3+1),], QuartData[((i-1)*3+2),]) ~ Grouping), col=c("green", "red"), legend.text=c("Under-expressed", "Over-expressed")) title(paste(Results$GeneNames[i]," (p=", round(Results$PValues[i],5), ")", sep="")) dev.off(); } } #estimate FDR Results$FDRp=numeric(GeneCnt); Sorted=sort(Results$PValues, index=TRUE) Results$PValues=Results$PValues[Sorted$ix] Results$GeneNames=Results$GeneNames[Sorted$ix] Results$Median=Results$Median[Sorted$ix] #Groups=Groups[Sorted$ix,] for(i in GeneCnt:1) { if(i==GeneCnt) { Results$FDRp[i]=Results$PValues[i] } else { Tmp=Results$PValues[i]*GeneCnt/i; if(Tmp