c2997108/centos6:2-blast_2.8.1-silva_132-KronaTools_2.7 c2997108/centos6:4 centos:centos6 using docker + set -o pipefail + cat + sed s/zcat/cat/g run-gz.sh + xargs -I '{}' -P 1 bash -c '{}' ++ find input_1/ ++ egrep '[.]f(ast|)q$' + for i in '`find $input_1/|egrep "[.]f(ast|)q$"`' + echo docker run -v '$PWD:$PWD' -w '$PWD' -u root -i --rm c2997108/centos6:2-blast_2.8.1-silva_132-KronaTools_2.7 bash run.sh input_1/food1.fastq 100 0.95 30 '' + for i in '`find $input_1/|egrep "[.]f(ast|)q$"`' + echo docker run -v '$PWD:$PWD' -w '$PWD' -u root -i --rm c2997108/centos6:2-blast_2.8.1-silva_132-KronaTools_2.7 bash run.sh input_1/food2.fastq 100 0.95 30 '' + for i in '`find $input_1/|egrep "[.]f(ast|)q$"`' + echo docker run -v '$PWD:$PWD' -w '$PWD' -u root -i --rm c2997108/centos6:2-blast_2.8.1-silva_132-KronaTools_2.7 bash run.sh input_1/food3.fastq 100 0.95 30 '' + for i in '`find $input_1/|egrep "[.]f(ast|)q$"`' + echo docker run -v '$PWD:$PWD' -w '$PWD' -u root -i --rm c2997108/centos6:2-blast_2.8.1-silva_132-KronaTools_2.7 bash run.sh input_1/food4.fastq 100 0.95 30 '' ++ find input_1/ ++ egrep '[.]f(ast|)q[.]gz$' ##count reads + echo '##count reads' +++ cat input_1/food1.fastq +++ wc -l ++ expr 1736 / 4 ##convert fastq to fasta + n0=434 + echo '##convert fastq to fasta' + j=input_1/food1.fasta + awk 'NR%4==1{print ">"substr($0,2)} NR%4==2{print $0}' + cat input_1/food1.fastq ##blast + echo '##blast' + blastn -db /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta -query input_1/food1.fasta -outfmt 6 -out input_1/food1.fasta.ssu.blast -num_threads 30 blast status: 0 + echo 'blast status: ' 0 + rm input_1/food1.fasta ##filtering + echo '##filtering' + cat input_1/food1.fasta.ssu.blast + awk '-F\t' '$12>100{if(a[$1]==1){if($12>=topbit*0.95){print $0}}else{a[$1]=1; topbit=$12; print $0}}' + echo '##determine LCA' + awk '-F\t' 'FILENAME==ARGV[1]{name[$1]=$2} FILENAME==ARGV[2]{print name[$2]"\t"$0}' /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta.name input_1/food1.fasta.ssu.blast.filtered ##determine LCA + awk '-F\t' ' function searchLCA(data, i, j, res, res2, str, n, stopflag){ for(i in data){ if(n==0){n=split(i,res,";")} else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}} } if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1}; for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}} return str; } { if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0} else{data[$1]=1} } END{if(length(data)>0){print searchLCA(data)"\t"oldstr}} ' input_1/food1.fasta.ssu.blast.filtered.name + awk '-F\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' input_1/food1.fasta.ssu.blast.filtered.name.lca + awk '-F\t' '{print "root;"$0}' input_1/food1.fasta.ssu.blast.filtered.name.lca.cnt ++ awk '-F\t' '{a+=$2} END{if(a==""){a=0}; print a}' input_1/food1.fasta.ssu.blast.filtered.name.lca.cnt + cnt=434 ++ expr 434 - 434 + echo -e 'No Hit\t0' + awk '-F\t' ' {n=split($1,arr,";"); ORS="\t"; print $2; for(i=1;i"substr($0,2)} NR%4==2{print $0}' + echo '##blast' + blastn -db /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta -query input_1/food2.fasta -outfmt 6 -out input_1/food2.fasta.ssu.blast -num_threads 30 ##blast + echo 'blast status: ' 0 + rm input_1/food2.fasta blast status: 0 ##filtering + echo '##filtering' + cat input_1/food2.fasta.ssu.blast + awk '-F\t' '$12>100{if(a[$1]==1){if($12>=topbit*0.95){print $0}}else{a[$1]=1; topbit=$12; print $0}}' ##determine LCA + echo '##determine LCA' + awk '-F\t' 'FILENAME==ARGV[1]{name[$1]=$2} FILENAME==ARGV[2]{print name[$2]"\t"$0}' /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta.name input_1/food2.fasta.ssu.blast.filtered + awk '-F\t' ' function searchLCA(data, i, j, res, res2, str, n, stopflag){ for(i in data){ if(n==0){n=split(i,res,";")} else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}} } if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1}; for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}} return str; } { if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0} else{data[$1]=1} } END{if(length(data)>0){print searchLCA(data)"\t"oldstr}} ' input_1/food2.fasta.ssu.blast.filtered.name + awk '-F\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' input_1/food2.fasta.ssu.blast.filtered.name.lca + awk '-F\t' '{print "root;"$0}' input_1/food2.fasta.ssu.blast.filtered.name.lca.cnt ++ awk '-F\t' '{a+=$2} END{if(a==""){a=0}; print a}' input_1/food2.fasta.ssu.blast.filtered.name.lca.cnt + cnt=1000 ++ expr 1000 - 1000 + echo -e 'No Hit\t0' + awk '-F\t' ' {n=split($1,arr,";"); ORS="\t"; print $2; for(i=1;i"substr($0,2)} NR%4==2{print $0}' ##blast + echo '##blast' + blastn -db /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta -query input_1/food3.fasta -outfmt 6 -out input_1/food3.fasta.ssu.blast -num_threads 30 + echo 'blast status: ' 0 + rm input_1/food3.fasta blast status: 0 ##filtering + echo '##filtering' + cat input_1/food3.fasta.ssu.blast + awk '-F\t' '$12>100{if(a[$1]==1){if($12>=topbit*0.95){print $0}}else{a[$1]=1; topbit=$12; print $0}}' ##determine LCA + echo '##determine LCA' + awk '-F\t' 'FILENAME==ARGV[1]{name[$1]=$2} FILENAME==ARGV[2]{print name[$2]"\t"$0}' /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta.name input_1/food3.fasta.ssu.blast.filtered + awk '-F\t' ' function searchLCA(data, i, j, res, res2, str, n, stopflag){ for(i in data){ if(n==0){n=split(i,res,";")} else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}} } if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1}; for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}} return str; } { if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0} else{data[$1]=1} } END{if(length(data)>0){print searchLCA(data)"\t"oldstr}} ' input_1/food3.fasta.ssu.blast.filtered.name + awk '-F\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' input_1/food3.fasta.ssu.blast.filtered.name.lca + awk '-F\t' '{print "root;"$0}' input_1/food3.fasta.ssu.blast.filtered.name.lca.cnt ++ awk '-F\t' '{a+=$2} END{if(a==""){a=0}; print a}' input_1/food3.fasta.ssu.blast.filtered.name.lca.cnt + cnt=1000 ++ expr 1000 - 1000 + echo -e 'No Hit\t0' + awk '-F\t' ' {n=split($1,arr,";"); ORS="\t"; print $2; for(i=1;i"substr($0,2)} NR%4==2{print $0}' ##blast + echo '##blast' + blastn -db /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta -query input_1/food4.fasta -outfmt 6 -out input_1/food4.fasta.ssu.blast -num_threads 30 + echo 'blast status: ' 0 + rm input_1/food4.fasta blast status: 0 ##filtering + echo '##filtering' + cat input_1/food4.fasta.ssu.blast + awk '-F\t' '$12>100{if(a[$1]==1){if($12>=topbit*0.95){print $0}}else{a[$1]=1; topbit=$12; print $0}}' + echo '##determine LCA' + awk '-F\t' 'FILENAME==ARGV[1]{name[$1]=$2} FILENAME==ARGV[2]{print name[$2]"\t"$0}' /usr/local/silva/SILVA_132_SSU-LSU_Ref.fasta.name input_1/food4.fasta.ssu.blast.filtered ##determine LCA + awk '-F\t' ' function searchLCA(data, i, j, res, res2, str, n, stopflag){ for(i in data){ if(n==0){n=split(i,res,";")} else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}} } if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1}; for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}} return str; } { if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0} else{data[$1]=1} } END{if(length(data)>0){print searchLCA(data)"\t"oldstr}} ' input_1/food4.fasta.ssu.blast.filtered.name + awk '-F\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' input_1/food4.fasta.ssu.blast.filtered.name.lca + awk '-F\t' '{print "root;"$0}' input_1/food4.fasta.ssu.blast.filtered.name.lca.cnt ++ awk '-F\t' '{a+=$2} END{if(a==""){a=0}; print a}' input_1/food4.fasta.ssu.blast.filtered.name.lca.cnt + cnt=670 ++ expr 670 - 670 + echo -e 'No Hit\t0' + awk '-F\t' ' {n=split($1,arr,";"); ORS="\t"; print $2; for(i=1;i1){for(i=2;i<=NF;i++){a[i]+=$i}}} FILENAME==ARGV[2]{if(FNR==1){OFS="\t"; for(i=2;i<=NF;i++){$i=$i" (counts per 10000)"; if(a[i]==0){a[i]=1}}; print $0} else{ORS=""; print $1;for(i=2;i<=NF;i++){print "\t"$i/a[i]*10000}; print "\n"}} ' all.counts.txt ./all.counts.txt + docker run -v /data/user2/work/92:/data/user2/work/92 -w /data/user2/work/92 -u root -i --rm c2997108/centos6:4 java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.txt all.counts.xlsx Start converting + docker run -v /data/user2/work/92:/data/user2/work/92 -w /data/user2/work/92 -u root -i --rm c2997108/centos6:4 java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.per.10000.txt all.counts.per.10000.xlsx Start converting + post_processing + '[' 1 = 1 ']' + '[' 'docker run -v $PWD:$PWD -w $PWD -u root -i --rm ' = 'docker run -v $PWD:$PWD -w $PWD -u root -i --rm ' ']' + docker run -v /data/user2/work/92:/data/user2/work/92 -w /data/user2/work/92 -u root -i --rm centos:centos6 chmod -R a=rXw . + echo 0 + exit