Meta-barcoding metagenome analysis pipeline using PR2 database and NCBI 16S, 23S, mito and plastid database
@SNL162:264:HHT3LBCXX:1:1104:3236:1913 1:N:0:TAAGGCGA+GCGTAAGA
GCTGTATAACATGCTTTTATAAAACCAGGTGATAAAATATTGGGTCTTGATTTATCTCATGGCGGACATCTTACTCATGGTTCTTCAGTTAATTTTAGTGG
+
DBDDD@G11<CGH1111<CCF1FEF0<<D<1<1<1<1111<1<1<1<1<1<FHFIIE1<@<?1/<<C/CEEF1G1<1FG11<1<1<C@<11<F11<DC<GE
@SNL162:264:HHT3LBCXX:1:1104:4529:1931 1:N:0:TAAGGCGA+GCGTAAGA
GGTCTGCACCGGTCAGGTTTGCGCCGGCCAGGTCCACACGGCCGAGTTGTGTATCAGTCAACCGTGCACTGGCCAGGTTCGCGTCGGTCAGGGTCGCGCTC
+
DDDDDEIGGH<C<C<DHGHI0D</DH//<CH1DGGE1@</</<<<F@GHIFEEFHG1<DFCHD/<1<11<<C1<FH1<1CHHDEHHH?GHH?ECEHIIIHE
@SNL162:264:HHT3LBCXX:1:1104:7853:1832 1:N:0:TAAGGCGA+GCGTAAGA
NCCTTATCTAGAAGAAATAAATAGATGCTCCTAATCTTTCTTAAAAGAATTTTTGTAGCTATACCTGTTCTTTTGGTAGTAACTAGTTTAACTTTTATTTT
@SNL162:264:HHT3LBCXX:1:1104:3236:1913 2:N:0:TAAGGCGA+GCGTAAGA
TCACAGAACTTTTCAAAATCCGGATCGCTTGAATGTTNCGTTGCAGCGGAACTTAGCAATTTTCGTTTTTCTTTTTGCGCTATCTCTTTATTTTTATCATA
+
0<0<011<D11<<D11111110/00/0011111<1<1#111<1011<//<//<<1111111<11<<11<<111<1<C1<///0<<111<11<1<1D1C?1E
@SNL162:264:HHT3LBCXX:1:1104:4529:1931 2:N:0:TAAGGCGA+GCGTAAGA
ATCCCCAACTGCATAGTGCGCAACTAGCCAGCCTGACNCTGCCGTCCGCTAACCGCTCCAATCAACCGTTGACTGATACACACCTCCCCCGTGTGCCCCTG
+
<D0D0E1D=<DHI?11<<C///</<1111111111<D#1111</0/</<</011///<<F11<<<F1/<CGCE?@11<11<10<<<1<CD/00<1<11<<<
@SNL162:264:HHT3LBCXX:1:1104:7853:1832 2:N:0:TAAGGCGA+GCGTAAGA
NCTTTCTCACTTAAAAAGGGTCCACCAGGAGCCAGGCNAATCAAAATAAAAGTTAAACTAGTTACTACCAAAAGAACAGGTATAGCTACAAAAATTCTTTT
metagenome~PR2_NCBI-16S-mito-plastid_paired-end -c 8 input_1/
pp metagenome~PR2_NCBI-16S-mito-plastid_paired-end -c 8 input_1/ Checking the realpath of input files. 0 input_1/ 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fq 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_2.fq 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.blast 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_2.fasta.ssu.blast 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.all.blast 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fq.html 1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt2.input c2997108/centos7:2-blast-taxid-2-KronaTools-2.7-pr2-mito c2997108/centos7:3-java centos:centos6 using docker + set -o pipefail + cat + cat + sed s/zcat/cat/g run-gz.sh + xargs -I '{}' -P 1 bash -c '{}' ++ find input_1// ++ egrep '(_R1.*|_1)[.]f(ast|)q$' + for i in '`find $input_1/|egrep "(_R1.*|_1)[.]f(ast|)q$"||true`' + echo 'PPDOCNAME=pp`date' '+%Y%m%d_%H%M%S_%3N`_$RANDOM;' echo '$PPDOCNAME' '>>' '/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/pp-docker-list;' docker run --name '${PPDOCNAME}' -v '$PWD:$PWD' -w '$PWD' -u 2007:600 -i --rm c2997108/centos7:2-blast-taxid-2-KronaTools-2.7-pr2-mito bash run.sh input_1//alna4-02_1k_1.fq 100 0.95 8 '' ++ find input_1// ++ egrep '(_R1.*|_1)[.]f(ast|)q[.]gz$' ++ true ##count reads + echo '##count reads' +++ cat input_1//alna4-02_1k_1.fq +++ wc -l ++ expr 4000 / 4 + n0=1000 + echo '##convert fastq to fasta' ##convert fastq to fasta + j=input_1//alna4-02_1k_1.fasta + cat input_1//alna4-02_1k_1.fq + awk 'NR%4==1{print ">"substr($0,2)} NR%4==2{print $0}' ++ echo input_1//alna4-02_1k_1.fq ++ egrep '_1[.]f(ast|)q$' ++ wc -l + '[' 1 = 1 ']' ++ echo input_1//alna4-02_1k_1.fq ++ sed 's/_1[.]f\(ast\|\)q$/_2.f\1q/' + i2=input_1//alna4-02_1k_2.fq ++ echo input_1//alna4-02_1k_1.fasta ++ egrep '_1[.]fasta$' ++ wc -l + '[' 1 = 1 ']' ++ echo input_1//alna4-02_1k_1.fasta ++ sed 's/_1[.]fasta$/_2.fasta/' + j2=input_1//alna4-02_1k_2.fasta + cat input_1//alna4-02_1k_2.fq + awk 'NR%4==1{print ">"substr($0,2)} NR%4==2{print $0}' + echo '##blast' ##blast + blastn -db /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa -query input_1//alna4-02_1k_1.fasta -outfmt 6 -out input_1//alna4-02_1k_1.fasta.ssu.blast -num_threads 8 + blastn -db /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa -query input_1//alna4-02_1k_2.fasta -outfmt 6 -out input_1//alna4-02_1k_2.fasta.ssu.blast -num_threads 8 + python run-count-paired.py input_1//alna4-02_1k_1.fasta input_1//alna4-02_1k_2.fasta input_1//alna4-02_1k_1.fasta.ssu.blast input_1//alna4-02_1k_2.fasta.ssu.blast + rm input_1//alna4-02_1k_1.fasta input_1//alna4-02_1k_2.fasta + echo '##filtering' ##filtering + cat input_1//alna4-02_1k_1.fasta.ssu.all.blast + awk '-F\t' '$3>100{if(a[$1]==1){if($3>=topbit*0.95){print $0}}else{a[$1]=1; topbit=$3; print $0}}' + echo '##determine LCA' ##determine LCA + awk '-F\t' 'FILENAME==ARGV[1]{name[$1]=$4} FILENAME==ARGV[2]{print name[$2]"\t"$0}' /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa.name input_1//alna4-02_1k_1.fasta.ssu.blast.filtered + awk '-F\t' ' function searchLCA(data, i, j, res, res2, str, n, stopflag){ for(i in data){ if(n==0){n=split(i,res,";")} else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}} } if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1}; for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}} return str; } { if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0} else{data[$1]=1} } END{if(length(data)>0){print searchLCA(data)"\t"oldstr}} ' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name + awk '-F\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca + awk '-F\t' '{print "root;"$0}' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt ++ awk '-F\t' '{a+=$2} END{if(a==""){a=0}; print a}' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt + cnt=4 ++ expr 1000 - 4 + echo -e 'No Hit\t996' + awk '-F\t' ' {n=split($1,arr,";"); ORS="\t"; print $2; for(i=1;i1){for(i=2;i<=NF;i++){a[i]+=$i}}} FILENAME==ARGV[2]{if(FNR==1){OFS="\t"; for(i=2;i<=NF;i++){$i=$i" (counts per 10000)"; if(a[i]==0){a[i]=1}}; print $0} else{ORS=""; print $1;for(i=2;i<=NF;i++){print "\t"$i/a[i]*10000}; print "\n"}} ' all.counts.txt ./all.counts.txt ++ date +%Y%m%d_%H%M%S_%3N + PPDOCNAME=pp20220809_213826_163_31272 + echo pp20220809_213826_163_31272 + docker run --name pp20220809_213826_163_31272 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.txt all.counts.xlsx Start converting ++ date +%Y%m%d_%H%M%S_%3N + PPDOCNAME=pp20220809_213827_534_28090 + echo pp20220809_213827_534_28090 + docker run --name pp20220809_213827_534_28090 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.per.10000.txt all.counts.per.10000.xlsx Start converting ++ egrep '([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$' ++ find input_1// + for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`' ++ date +%Y%m%d_%H%M%S_%3N + PPDOCNAME=pp20220809_213828_966_30733 + echo pp20220809_213828_966_30733 + docker run --name pp20220809_213828_966_30733 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered + for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`' ++ date +%Y%m%d_%H%M%S_%3N + PPDOCNAME=pp20220809_213829_670_20206 + echo pp20220809_213829_670_20206 + docker run --name pp20220809_213829_670_20206 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca + for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`' ++ date +%Y%m%d_%H%M%S_%3N + PPDOCNAME=pp20220809_213830_384_9125 + echo pp20220809_213830_384_9125 + docker run --name pp20220809_213830_384_9125 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt + for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`' ++ date +%Y%m%d_%H%M%S_%3N + PPDOCNAME=pp20220809_213831_169_13177 + echo pp20220809_213831_169_13177 + docker run --name pp20220809_213831_169_13177 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt2 + for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`' ++ date +%Y%m%d_%H%M%S_%3N + PPDOCNAME=pp20220809_213831_896_27711 + echo pp20220809_213831_896_27711 + docker run --name pp20220809_213831_896_27711 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt3 + post_processing + '[' 1 = 1 ']' + echo 0 + exit PID: 61436