script: /suikou/tool/yoshitake/pp/statistics~ballgown "$scriptdir"/statistics~scatter_with_variance c2997108/centos7:1-blast_2.9.0-nt_20190618-13 c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0 c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 c2997108/centos7:3-java centos:centos6 quay.io/biocontainers/cufflinks:2.2.1--py36_2 quay.io/biocontainers/hisat2:2.1.0--py27h6bb024c_3 quay.io/biocontainers/stringtie:1.3.4--py35_0 using docker + set -o pipefail + script=run-ballgown.a.b.isoform.R + inputsamplematrix=sample.input.a.b.txt + inputmatrix=isoforms.count_table.a.b.txt + inputgenematrix=genes.count_table.a.b.txt + g1=a + g2=b + '[' a = '' ']' + '[' b = '' ']' + p=0.05 ++ basename isoforms.count_table.a.b.txt + outputfile=ballgown.isoforms.count_table.a.b.txt ++ basename sample.input.a.b.txt + samplex=ballgown.sample.input.a.b.txt.x ++ basename sample.input.a.b.txt + sampley=ballgown.sample.input.a.b.txt.y ++ basename isoforms.count_table.a.b.txt + outputfilexup=result.ballgown.isoforms.count_table.a.b.txt.a.up.b.down.txt ++ basename isoforms.count_table.a.b.txt + outputfileyup=result.ballgown.isoforms.count_table.a.b.txt.a.down.b.up.txt ++ basename genes.count_table.a.b.txt + outputgenefile=ballgown.genes.count_table.a.b.txt ++ basename genes.count_table.a.b.txt + outputgenefilexup=result.ballgown.genes.count_table.a.b.txt.a.up.b.down.txt ++ basename genes.count_table.a.b.txt + outputgenefileyup=result.ballgown.genes.count_table.a.b.txt.a.down.b.up.txt + '[' Trinotate.xls3.isoform.cnt = '' ']' + annotationfile=Trinotate.xls3.isoform.cnt + '[' Trinotate.xls3.gene.cnt = '' ']' + annotationgenefile=Trinotate.xls3.gene.cnt + '[' input.ballgown = '' ']' + ln -s input.ballgown input.ballgown.extract.a.b + awk '-F\t' -v g1=a -v g2=b '{if(NR==1){print $0}else{OFS="\t"; if($2==g1){$2=1}else if($2==g2){$2=2}; print $0}}' sample.input.a.b.txt + cat + docker run -v /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis:/yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -w /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -u root -i --rm c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 R --vanilla R version 3.5.2 (2018-12-20) -- "Eggshell Igloo" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-redhat-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(ballgown) Attaching package: 'ballgown' The following object is masked from 'package:base': structure > pheno_data = read.table(file ="sample.input.a.b.txt.num", header = TRUE, sep = "\t") > sample_full_path = paste("input.ballgown.extract.a.b",pheno_data[,1], sep = '/') > bg2 = ballgown(samples=as.vector(sample_full_path),pData=pheno_data) Thu Nov 18 20:17:40 2021 Thu Nov 18 20:17:40 2021: Reading linking tables Thu Nov 18 20:17:40 2021: Reading intron data files Thu Nov 18 20:17:40 2021: Merging intron data Thu Nov 18 20:17:40 2021: Reading exon data files Thu Nov 18 20:17:40 2021: Merging exon data Thu Nov 18 20:17:40 2021: Reading transcript data files Thu Nov 18 20:17:40 2021: Merging transcript data Wrapping up the results > results_genes = stattest(bg2, feature="gene", covariate="condition", getFC=TRUE, meas="FPKM") Thu Nov 18 20:17:40 2021 > c=results_genes[!is.na(results_genes[,4]),] > results_genes_t = stattest(bg2, feature="transcript", covariate="condition", getFC=TRUE, meas="FPKM") > whole_tx_table = texpr(bg2, 'all') > aa=merge(results_genes_t,whole_tx_table,by.x=c("id"),by.y=c("t_id")) > d=aa[!is.na(aa[,4]),] > res=cbind(d$t_name,d$qval,d$fc) > res=res[res[,2]<0.05,] > colnames(res)=c("id","qval","fc") > resgene=cbind(as.character(c$id),c$qval,c$fc) > resgene=resgene[resgene[,2]<0.05,] > colnames(resgene)=c("id","qval","fc") > write.table(res,file="ballgown.isoforms.count_table.a.b.txt",sep="\t",row.names=F,quote=F) > write.table(resgene,file="ballgown.genes.count_table.a.b.txt",sep="\t",row.names=F,quote=F) > > + '[' '!' -e ballgown.isoforms.count_table.a.b.txt ']' + '[' '!' -e ballgown.genes.count_table.a.b.txt ']' + tail -n+2 ballgown.isoforms.count_table.a.b.txt + cut -f 1 + awk '-F\t' -v g1=a '$2==g1{print $1}' sample.input.a.b.txt + awk '-F\t' -v g2=b '$2==g2{print $1}' sample.input.a.b.txt + bash /suikou/tool/yoshitake/pp/statistics~scatter_with_variance -x a -y b isoforms.count_table.a.b.txt ballgown.isoforms.count_table.a.b.txt.id ballgown.sample.input.a.b.txt.x ballgown.sample.input.a.b.txt.y c2997108/centos7:1-blast_2.9.0-nt_20190618-13 c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0 c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 c2997108/centos7:3-java centos:centos6 quay.io/biocontainers/cufflinks:2.2.1--py36_2 quay.io/biocontainers/hisat2:2.1.0--py27h6bb024c_3 quay.io/biocontainers/stringtie:1.3.4--py35_0 using docker + set -o pipefail + countmat=isoforms.count_table.a.b.txt + sigmat=ballgown.isoforms.count_table.a.b.txt.id + listx=ballgown.sample.input.a.b.txt.x + listy=ballgown.sample.input.a.b.txt.y + label_x=a + label_y=b + p=0.05 ++ basename isoforms.count_table.a.b.txt ++ basename ballgown.isoforms.count_table.a.b.txt.id + output=isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id + cat isoforms.count_table.a.b.txt + awk '-F\t' 'FILENAME==ARGV[1]{if(FNR>1){for(i=2;i<=NF;i++){a[i]+=$i}}} FILENAME==ARGV[2]{if(FNR==1){print $0}else{ORS=""; print $1;for(i=2;i<=NF;i++){print "\t"$i/a[i]*1000*1000}; print "\n"}}' /dev/stdin isoforms.count_table.a.b.txt + cat ballgown.sample.input.a.b.txt.x + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2]{if(FNR==1){for(i=1;i<=NF;i++){if(a[$i]==1){b[i]=1}}}; ORS=""; print $1; for(i=2;i<=NF;i++){if(b[i]==1){print "\t"$i}}; print "\n"}' /dev/stdin isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm + cat ballgown.sample.input.a.b.txt.y + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2]{if(FNR==1){for(i=1;i<=NF;i++){if(a[$i]==1){b[i]=1}}}; ORS=""; print $1; for(i=2;i<=NF;i++){if(b[i]==1){print "\t"$i}}; print "\n"}' /dev/stdin isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm + cat ballgown.isoforms.count_table.a.b.txt.id + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2] && (FNR==1 || a[$1]==1){print $0}' /dev/stdin isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_x + cat ballgown.isoforms.count_table.a.b.txt.id + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2] && (FNR==1 || a[$1]==1){print $0}' /dev/stdin isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_y + cat + docker run -v /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis:/yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -w /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -u root -i --rm c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 R --vanilla R version 3.5.2 (2018-12-20) -- "Eggshell Igloo" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-redhat-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > label_x="a" > label_y="b" > file_data_x="isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_x" > file_data_y="isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_y" > file_sig_data_x="isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_x_sig" > file_sig_data_y="isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_y_sig" > data_x=round(read.table(file_data_x,sep="\t",header=T,row.names=1)) > data_y=round(read.table(file_data_y,sep="\t",header=T,row.names=1)) > sig_data_x=read.table(file_sig_data_x,sep="\t",header=T,row.names=1) > if(dim(sig_data_x)[1]!=0){sig_data_x=round(sig_data_x)} > sig_data_y=read.table(file_sig_data_y,sep="\t",header=T,row.names=1) > if(dim(sig_data_y)[1]!=0){sig_data_y=round(sig_data_y)} > mean_data_x=apply(data_x,1,mean) > mean_data_y=apply(data_y,1,mean) > if(dim(data_x)[2]>1){ + dx=apply(data_x,1,function(i){return(abs(qt(0.05/2, length(i)-1)) * sd(i) / sqrt(length(i)))}) + }else{ + dx=0 + } > if(dim(data_y)[2]>1){ + dy=apply(data_y,1,function(i){return(abs(qt(0.05/2, length(i)-1)) * sd(i) / sqrt(length(i)))}) + }else{ + dy=0 + } > vx=dx/mean_data_x > vy=dy/mean_data_y > vx[is.na(vx)]=0 > vy[is.na(vy)]=0 > #total_var_data=sqrt(dx^2+dy^2)/sqrt(mean_data_x^2+mean_data_y^2) > #total_var_data[is.na(total_var_data)]=0 > total_var_data=sqrt(vx^2+vy^2) > mean_sig_data_x=apply(sig_data_x,1,mean) > mean_sig_data_y=apply(sig_data_y,1,mean) > min_x=min(mean_data_x[mean_data_x>0])/10 > min_y=min(mean_data_y[mean_data_y>0])/10 > plot_mean_data_x=mean_data_x+min_x > plot_mean_data_y=mean_data_y+min_y > plot_mean_sig_data_x=mean_sig_data_x+min_x > plot_mean_sig_data_y=mean_sig_data_y+min_y > max_x=max(plot_mean_data_x) > max_y=max(plot_mean_data_y) > library(Cairo) > mainplotfunc=function(){ + if(max(dx,dy)>0){ + start_class=quantile(total_var_data[total_var_data>0],c(0,1,25,50,75,90,100)/100)[2] + end_class=quantile(total_var_data[total_var_data>0],c(0,1,25,50,75,90,100)/100)[6] + + CairoPNG("isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.scatter.png",width=800,height=600) + num_class=22 + library(RColorBrewer) + #mycol=colorRampPalette(c(rgb(0.5,0,0.2,0.4),rgb(1,0.8,0.7,1)), alpha=TRUE)(num_class) + colPal3 <- colorRampPalette(brewer.pal(9,"YlOrRd")[3:9]) + mycol=colPal3(num_class) + + layout(matrix(c(1,1,1,1,2),1,5)) + par(ps = 18) + k=0 + i=end_class + n=(total_var_data>=i) + xt=plot_mean_data_x[n] + yt=plot_mean_data_y[n] + plot(xt,yt,log="xy",xlim=c(min_x,max_x),ylim=c(min_y,max_y),cex=2*(k+1)^3/num_class^3,pch=21,col=adjustcolor(mycol[k+1],0),bg=adjustcolor(mycol[k+1],1-0.4*k/num_class),xlab=label_x,ylab=label_y) + + for(k in 1:(num_class-2)){ + i=end_class+(start_class-end_class)*k/(num_class-2) + n=(total_var_data>=i & total_var_data=i & total_var_data=",i),pos=4) + } + k=num_class-1 + i=min(total_var_data) + par(new=T) + plot(c(0),c(k),xlim=c(0,0),ylim=c(-1,num_class),col=adjustcolor(mycol[k+1],0),bg=adjustcolor(mycol[k+1],1-0.4*k/num_class),pch=21,cex=2*(k+1)^3/num_class^3,bty="n",axes=F,xlab="",ylab="") + text(c(0.05),c(k),paste(">=",i),pos=4) + text(0,num_class+0.2,"95%conf/mean") + par(new=T) + plot(c(-0.3),c(-1),xlim=c(0,0),ylim=c(-1,num_class),col="green",cex=1,bty="n",axes=F,xlab="",ylab="") + text(c(-0.3),c(-1),paste("significant"),pos=4) + + dev.off() + } + } > subplotfunc=function(){ + CairoPNG("isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.scatter.normal.png",width=600,height=600) + plot(plot_mean_data_x,plot_mean_data_y,log="xy",xlim=c(min_x,max_x),ylim=c(min_y,max_y),cex=0.1,pch=21,col=adjustcolor("#000000",0.5),xlab=label_x,ylab=label_y) + par(new=T) + plot(plot_mean_sig_data_x,plot_mean_sig_data_y,log="xy",xlim=c(min_x,max_x),ylim=c(min_y,max_y),col="green",cex=2,axes=F,xlab="",ylab="") + dev.off() + } > > wflag=0 > while(wflag<=5){ + restry=NULL + restry=try(mainplotfunc(), silent = FALSE) + if(class(restry)!="try-error"){break} + try(dev.off()) + Sys.sleep(10+runif(1, min=0,max=100)/10) + wflag=wflag+1 + } > wflag=0 > while(wflag<=5){ + restry=NULL + restry=try(subplotfunc(), silent = FALSE) + if(class(restry)!="try-error"){break} + try(dev.off()) + Sys.sleep(10+runif(1, min=0,max=100)/10) + wflag=wflag+1 + } > > > + rm -f isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_x isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_y isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_x_sig isoforms.count_table.a.b.txt.ballgown.isoforms.count_table.a.b.txt.id.cpm_y_sig + post_processing + '[' 3 = 1 ']' + exit + tail -n+2 ballgown.isoforms.count_table.a.b.txt + awk '{if($3<=1){print $1"\tx"}else{print $1"\ty"}}' + cut -f 1,2 ballgown.isoforms.count_table.a.b.txt + awk '-F\t' 'NR==1{print -1"\t"$0} NR>1{print $2"\t"$0}' + sort -k1,1g + cut -f 2- + awk '-F\t' 'FILENAME==ARGV[1]{xy[$1]=$2} FILENAME==ARGV[2]{if(FNR==1){header=$0}; res[$1]=$0} FILENAME==ARGV[3]{if(FNR==1){print header"\t"$2}else{if(xy[$1]=="x"){print res[$1]"\t"$2}}}' ballgown.isoforms.count_table.a.b.txt.xy Trinotate.xls3.isoform.cnt ballgown.isoforms.count_table.a.b.txt.p + awk '-F\t' 'FILENAME==ARGV[1]{xy[$1]=$2} FILENAME==ARGV[2]{if(FNR==1){header=$0}; res[$1]=$0} FILENAME==ARGV[3]{if(FNR==1){print header"\t"$2}else{if(xy[$1]=="y"){print res[$1]"\t"$2}}}' ballgown.isoforms.count_table.a.b.txt.xy Trinotate.xls3.isoform.cnt ballgown.isoforms.count_table.a.b.txt.p + i=result.ballgown.isoforms.count_table.a.b.txt.a.up.b.down.txt + awk '-F\t' -v maxlen=30000 'FILENAME==ARGV[1]{for(i=1;i<=NF;i++){n=int((length($i)-1)/maxlen)+1; if(n>a[i]){a[i]=n}}} FILENAME==ARGV[2]{if(FNR==1){ORS=""; for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print $i" (split "j")\t"}}}; print "\n"}else{for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print substr($i,(j-1)*maxlen+1,maxlen)"\t" }}}; print "\n"}}' result.ballgown.isoforms.count_table.a.b.txt.a.up.b.down.txt ./result.ballgown.isoforms.count_table.a.b.txt.a.up.b.down.txt + sed 's/\t$//' + i=result.ballgown.isoforms.count_table.a.b.txt.a.down.b.up.txt + sed 's/\t$//' + awk '-F\t' -v maxlen=30000 'FILENAME==ARGV[1]{for(i=1;i<=NF;i++){n=int((length($i)-1)/maxlen)+1; if(n>a[i]){a[i]=n}}} FILENAME==ARGV[2]{if(FNR==1){ORS=""; for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print $i" (split "j")\t"}}}; print "\n"}else{for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print substr($i,(j-1)*maxlen+1,maxlen)"\t" }}}; print "\n"}}' result.ballgown.isoforms.count_table.a.b.txt.a.down.b.up.txt ./result.ballgown.isoforms.count_table.a.b.txt.a.down.b.up.txt + docker run -v /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis:/yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -w /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -u root -i --rm c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 java -Xmx1G -jar /usr/local/bin/excel2.jar result.ballgown.isoforms.count_table.a.b.txt.a.up.b.down.txt.temp result.ballgown.isoforms.count_table.a.b.txt.a.up.b.down.txt.xlsx Start converting + docker run -v /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis:/yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -w /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -u root -i --rm c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 java -Xmx1G -jar /usr/local/bin/excel2.jar result.ballgown.isoforms.count_table.a.b.txt.a.down.b.up.txt.temp result.ballgown.isoforms.count_table.a.b.txt.a.down.b.up.txt.xlsx Start converting + tail -n+2 ballgown.genes.count_table.a.b.txt + cut -f 1 + bash /suikou/tool/yoshitake/pp/statistics~scatter_with_variance -x a -y b genes.count_table.a.b.txt ballgown.genes.count_table.a.b.txt.id ballgown.sample.input.a.b.txt.x ballgown.sample.input.a.b.txt.y c2997108/centos7:1-blast_2.9.0-nt_20190618-13 c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0 c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 c2997108/centos7:3-java centos:centos6 quay.io/biocontainers/cufflinks:2.2.1--py36_2 quay.io/biocontainers/hisat2:2.1.0--py27h6bb024c_3 quay.io/biocontainers/stringtie:1.3.4--py35_0 using docker + set -o pipefail + countmat=genes.count_table.a.b.txt + sigmat=ballgown.genes.count_table.a.b.txt.id + listx=ballgown.sample.input.a.b.txt.x + listy=ballgown.sample.input.a.b.txt.y + label_x=a + label_y=b + p=0.05 ++ basename genes.count_table.a.b.txt ++ basename ballgown.genes.count_table.a.b.txt.id + output=genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id + cat genes.count_table.a.b.txt + awk '-F\t' 'FILENAME==ARGV[1]{if(FNR>1){for(i=2;i<=NF;i++){a[i]+=$i}}} FILENAME==ARGV[2]{if(FNR==1){print $0}else{ORS=""; print $1;for(i=2;i<=NF;i++){print "\t"$i/a[i]*1000*1000}; print "\n"}}' /dev/stdin genes.count_table.a.b.txt + cat ballgown.sample.input.a.b.txt.x + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2]{if(FNR==1){for(i=1;i<=NF;i++){if(a[$i]==1){b[i]=1}}}; ORS=""; print $1; for(i=2;i<=NF;i++){if(b[i]==1){print "\t"$i}}; print "\n"}' /dev/stdin genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm + cat ballgown.sample.input.a.b.txt.y + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2]{if(FNR==1){for(i=1;i<=NF;i++){if(a[$i]==1){b[i]=1}}}; ORS=""; print $1; for(i=2;i<=NF;i++){if(b[i]==1){print "\t"$i}}; print "\n"}' /dev/stdin genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm + cat ballgown.genes.count_table.a.b.txt.id + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2] && (FNR==1 || a[$1]==1){print $0}' /dev/stdin genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_x + cat ballgown.genes.count_table.a.b.txt.id + awk '-F\t' 'FILENAME==ARGV[1]{a[$1]=1} FILENAME==ARGV[2] && (FNR==1 || a[$1]==1){print $0}' /dev/stdin genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_y + cat + docker run -v /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis:/yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -w /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -u root -i --rm c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 R --vanilla R version 3.5.2 (2018-12-20) -- "Eggshell Igloo" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-redhat-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > label_x="a" > label_y="b" > file_data_x="genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_x" > file_data_y="genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_y" > file_sig_data_x="genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_x_sig" > file_sig_data_y="genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_y_sig" > data_x=round(read.table(file_data_x,sep="\t",header=T,row.names=1)) > data_y=round(read.table(file_data_y,sep="\t",header=T,row.names=1)) > sig_data_x=read.table(file_sig_data_x,sep="\t",header=T,row.names=1) > if(dim(sig_data_x)[1]!=0){sig_data_x=round(sig_data_x)} > sig_data_y=read.table(file_sig_data_y,sep="\t",header=T,row.names=1) > if(dim(sig_data_y)[1]!=0){sig_data_y=round(sig_data_y)} > mean_data_x=apply(data_x,1,mean) > mean_data_y=apply(data_y,1,mean) > if(dim(data_x)[2]>1){ + dx=apply(data_x,1,function(i){return(abs(qt(0.05/2, length(i)-1)) * sd(i) / sqrt(length(i)))}) + }else{ + dx=0 + } > if(dim(data_y)[2]>1){ + dy=apply(data_y,1,function(i){return(abs(qt(0.05/2, length(i)-1)) * sd(i) / sqrt(length(i)))}) + }else{ + dy=0 + } > vx=dx/mean_data_x > vy=dy/mean_data_y > vx[is.na(vx)]=0 > vy[is.na(vy)]=0 > #total_var_data=sqrt(dx^2+dy^2)/sqrt(mean_data_x^2+mean_data_y^2) > #total_var_data[is.na(total_var_data)]=0 > total_var_data=sqrt(vx^2+vy^2) > mean_sig_data_x=apply(sig_data_x,1,mean) > mean_sig_data_y=apply(sig_data_y,1,mean) > min_x=min(mean_data_x[mean_data_x>0])/10 > min_y=min(mean_data_y[mean_data_y>0])/10 > plot_mean_data_x=mean_data_x+min_x > plot_mean_data_y=mean_data_y+min_y > plot_mean_sig_data_x=mean_sig_data_x+min_x > plot_mean_sig_data_y=mean_sig_data_y+min_y > max_x=max(plot_mean_data_x) > max_y=max(plot_mean_data_y) > library(Cairo) > mainplotfunc=function(){ + if(max(dx,dy)>0){ + start_class=quantile(total_var_data[total_var_data>0],c(0,1,25,50,75,90,100)/100)[2] + end_class=quantile(total_var_data[total_var_data>0],c(0,1,25,50,75,90,100)/100)[6] + + CairoPNG("genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.scatter.png",width=800,height=600) + num_class=22 + library(RColorBrewer) + #mycol=colorRampPalette(c(rgb(0.5,0,0.2,0.4),rgb(1,0.8,0.7,1)), alpha=TRUE)(num_class) + colPal3 <- colorRampPalette(brewer.pal(9,"YlOrRd")[3:9]) + mycol=colPal3(num_class) + + layout(matrix(c(1,1,1,1,2),1,5)) + par(ps = 18) + k=0 + i=end_class + n=(total_var_data>=i) + xt=plot_mean_data_x[n] + yt=plot_mean_data_y[n] + plot(xt,yt,log="xy",xlim=c(min_x,max_x),ylim=c(min_y,max_y),cex=2*(k+1)^3/num_class^3,pch=21,col=adjustcolor(mycol[k+1],0),bg=adjustcolor(mycol[k+1],1-0.4*k/num_class),xlab=label_x,ylab=label_y) + + for(k in 1:(num_class-2)){ + i=end_class+(start_class-end_class)*k/(num_class-2) + n=(total_var_data>=i & total_var_data=i & total_var_data=",i),pos=4) + } + k=num_class-1 + i=min(total_var_data) + par(new=T) + plot(c(0),c(k),xlim=c(0,0),ylim=c(-1,num_class),col=adjustcolor(mycol[k+1],0),bg=adjustcolor(mycol[k+1],1-0.4*k/num_class),pch=21,cex=2*(k+1)^3/num_class^3,bty="n",axes=F,xlab="",ylab="") + text(c(0.05),c(k),paste(">=",i),pos=4) + text(0,num_class+0.2,"95%conf/mean") + par(new=T) + plot(c(-0.3),c(-1),xlim=c(0,0),ylim=c(-1,num_class),col="green",cex=1,bty="n",axes=F,xlab="",ylab="") + text(c(-0.3),c(-1),paste("significant"),pos=4) + + dev.off() + } + } > subplotfunc=function(){ + CairoPNG("genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.scatter.normal.png",width=600,height=600) + plot(plot_mean_data_x,plot_mean_data_y,log="xy",xlim=c(min_x,max_x),ylim=c(min_y,max_y),cex=0.1,pch=21,col=adjustcolor("#000000",0.5),xlab=label_x,ylab=label_y) + par(new=T) + plot(plot_mean_sig_data_x,plot_mean_sig_data_y,log="xy",xlim=c(min_x,max_x),ylim=c(min_y,max_y),col="green",cex=2,axes=F,xlab="",ylab="") + dev.off() + } > > wflag=0 > while(wflag<=5){ + restry=NULL + restry=try(mainplotfunc(), silent = FALSE) + if(class(restry)!="try-error"){break} + try(dev.off()) + Sys.sleep(10+runif(1, min=0,max=100)/10) + wflag=wflag+1 + } > wflag=0 > while(wflag<=5){ + restry=NULL + restry=try(subplotfunc(), silent = FALSE) + if(class(restry)!="try-error"){break} + try(dev.off()) + Sys.sleep(10+runif(1, min=0,max=100)/10) + wflag=wflag+1 + } > > > + rm -f genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_x genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_y genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_x_sig genes.count_table.a.b.txt.ballgown.genes.count_table.a.b.txt.id.cpm_y_sig + post_processing + '[' 3 = 1 ']' + exit + tail -n+2 ballgown.genes.count_table.a.b.txt + awk '{if($3<=1){print $1"\tx"}else{print $1"\ty"}}' + cut -f 1,2 ballgown.genes.count_table.a.b.txt + awk '-F\t' 'NR==1{print -1"\t"$0} NR>1{print $2"\t"$0}' + sort -k1,1g + cut -f 2- + awk '-F\t' 'FILENAME==ARGV[1]{xy[$1]=$2} FILENAME==ARGV[2]{if(FNR==1){header=$0}; res[$1]=$0} FILENAME==ARGV[3]{if(FNR==1){print header"\t"$2}else{if(xy[$1]=="x"){print res[$1]"\t"$2}}}' ballgown.genes.count_table.a.b.txt.xy Trinotate.xls3.gene.cnt ballgown.genes.count_table.a.b.txt.p + awk '-F\t' 'FILENAME==ARGV[1]{xy[$1]=$2} FILENAME==ARGV[2]{if(FNR==1){header=$0}; res[$1]=$0} FILENAME==ARGV[3]{if(FNR==1){print header"\t"$2}else{if(xy[$1]=="y"){print res[$1]"\t"$2}}}' ballgown.genes.count_table.a.b.txt.xy Trinotate.xls3.gene.cnt ballgown.genes.count_table.a.b.txt.p + i=result.ballgown.genes.count_table.a.b.txt.a.up.b.down.txt + awk '-F\t' -v maxlen=30000 'FILENAME==ARGV[1]{for(i=1;i<=NF;i++){n=int((length($i)-1)/maxlen)+1; if(n>a[i]){a[i]=n}}} FILENAME==ARGV[2]{if(FNR==1){ORS=""; for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print $i" (split "j")\t"}}}; print "\n"}else{for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print substr($i,(j-1)*maxlen+1,maxlen)"\t" }}}; print "\n"}}' result.ballgown.genes.count_table.a.b.txt.a.up.b.down.txt ./result.ballgown.genes.count_table.a.b.txt.a.up.b.down.txt + sed 's/\t$//' + i=result.ballgown.genes.count_table.a.b.txt.a.down.b.up.txt + sed 's/\t$//' + awk '-F\t' -v maxlen=30000 'FILENAME==ARGV[1]{for(i=1;i<=NF;i++){n=int((length($i)-1)/maxlen)+1; if(n>a[i]){a[i]=n}}} FILENAME==ARGV[2]{if(FNR==1){ORS=""; for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print $i" (split "j")\t"}}}; print "\n"}else{for(i=1;i<=NF;i++){if(a[i]<=1){print $i"\t"}else{for(j=1;j<=a[i];j++){print substr($i,(j-1)*maxlen+1,maxlen)"\t" }}}; print "\n"}}' result.ballgown.genes.count_table.a.b.txt.a.down.b.up.txt ./result.ballgown.genes.count_table.a.b.txt.a.down.b.up.txt + docker run -v /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis:/yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -w /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -u root -i --rm c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 java -Xmx1G -jar /usr/local/bin/excel2.jar result.ballgown.genes.count_table.a.b.txt.a.up.b.down.txt.temp result.ballgown.genes.count_table.a.b.txt.a.up.b.down.txt.xlsx Start converting + docker run -v /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis:/yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -w /yoshitake/test/RNA-seq~HISAT2-StringTie-DEGanalysis -u root -i --rm c2997108/centos7:1-trinity_2.8.5-kallisto_0.46.0-blast_2.9.0-trinotate-3.1.1-R_4-kegg_4 java -Xmx1G -jar /usr/local/bin/excel2.jar result.ballgown.genes.count_table.a.b.txt.a.down.b.up.txt.temp result.ballgown.genes.count_table.a.b.txt.a.down.b.up.txt.xlsx Start converting + rm -rf input.ballgown.extract.a.b + rm -f ballgown.isoforms.count_table.a.b.txt.id ballgown.sample.input.a.b.txt.x ballgown.sample.input.a.b.txt.y ballgown.isoforms.count_table.a.b.txt.xy ballgown.isoforms.count_table.a.b.txt.p result.ballgown.isoforms.count_table.a.b.txt.a.up.b.down.txt.temp result.ballgown.isoforms.count_table.a.b.txt.a.down.b.up.txt.temp + rm -f ballgown.genes.count_table.a.b.txt.id ballgown.genes.count_table.a.b.txt.xy ballgown.genes.count_table.a.b.txt.p result.ballgown.genes.count_table.a.b.txt.a.up.b.down.txt.temp result.ballgown.genes.count_table.a.b.txt.a.down.b.up.txt.temp + post_processing + '[' 2 = 1 ']' + exit