metagenome~PR2_NCBI-16S-mito-plastid_paired-end

Meta-barcoding metagenome analysis pipeline using PR2 database and NCBI 16S, 23S, mito and plastid database

input_1:paired-end FASTQ(.gz)

input_1/alna4-02_1k_1.fq

@SNL162:264:HHT3LBCXX:1:1104:3236:1913 1:N:0:TAAGGCGA+GCGTAAGA
GCTGTATAACATGCTTTTATAAAACCAGGTGATAAAATATTGGGTCTTGATTTATCTCATGGCGGACATCTTACTCATGGTTCTTCAGTTAATTTTAGTGG
+
DBDDD@G11<CGH1111<CCF1FEF0<<D<1<1<1<1111<1<1<1<1<1<FHFIIE1<@<?1/<<C/CEEF1G1<1FG11<1<1<C@<11<F11<DC<GE
@SNL162:264:HHT3LBCXX:1:1104:4529:1931 1:N:0:TAAGGCGA+GCGTAAGA
GGTCTGCACCGGTCAGGTTTGCGCCGGCCAGGTCCACACGGCCGAGTTGTGTATCAGTCAACCGTGCACTGGCCAGGTTCGCGTCGGTCAGGGTCGCGCTC
+
DDDDDEIGGH<C<C<DHGHI0D</DH//<CH1DGGE1@</</<<<F@GHIFEEFHG1<DFCHD/<1<11<<C1<FH1<1CHHDEHHH?GHH?ECEHIIIHE
@SNL162:264:HHT3LBCXX:1:1104:7853:1832 1:N:0:TAAGGCGA+GCGTAAGA
NCCTTATCTAGAAGAAATAAATAGATGCTCCTAATCTTTCTTAAAAGAATTTTTGTAGCTATACCTGTTCTTTTGGTAGTAACTAGTTTAACTTTTATTTT

input_1/alna4-02_1k_2.fq

@SNL162:264:HHT3LBCXX:1:1104:3236:1913 2:N:0:TAAGGCGA+GCGTAAGA
TCACAGAACTTTTCAAAATCCGGATCGCTTGAATGTTNCGTTGCAGCGGAACTTAGCAATTTTCGTTTTTCTTTTTGCGCTATCTCTTTATTTTTATCATA
+
0<0<011<D11<<D11111110/00/0011111<1<1#111<1011<//<//<<1111111<11<<11<<111<1<C1<///0<<111<11<1<1D1C?1E
@SNL162:264:HHT3LBCXX:1:1104:4529:1931 2:N:0:TAAGGCGA+GCGTAAGA
ATCCCCAACTGCATAGTGCGCAACTAGCCAGCCTGACNCTGCCGTCCGCTAACCGCTCCAATCAACCGTTGACTGATACACACCTCCCCCGTGTGCCCCTG
+
<D0D0E1D=<DHI?11<<C///</<1111111111<D#1111</0/</<</011///<<F11<<<F1/<CGCE?@11<11<10<<<1<CD/00<1<11<<<
@SNL162:264:HHT3LBCXX:1:1104:7853:1832 2:N:0:TAAGGCGA+GCGTAAGA
NCTTTCTCACTTAAAAAGGGTCCACCAGGAGCCAGGCNAATCAAAATAAAAGTTAAACTAGTTACTACCAAAAGAACAGGTATAGCTACAAAAATTCTTTT

Command

metagenome~PR2_NCBI-16S-mito-plastid_paired-end -c 8 input_1/

Output

all.html

view all outputs

Log

pp metagenome~PR2_NCBI-16S-mito-plastid_paired-end -c 8 input_1/
Checking the realpath of input files.
0 input_1/
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fq
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_2.fq
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.blast
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_2.fasta.ssu.blast
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.all.blast
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fq.html
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt2.input
c2997108/centos7:2-blast-taxid-2-KronaTools-2.7-pr2-mito c2997108/centos7:3-java centos:centos6
using docker
+ set -o pipefail
+ cat
+ cat
+ sed s/zcat/cat/g run-gz.sh
+ xargs -I '{}' -P 1 bash -c '{}'
++ find input_1//
++ egrep '(_R1.*|_1)[.]f(ast|)q$'
+ for i in '`find $input_1/|egrep "(_R1.*|_1)[.]f(ast|)q$"||true`'
+ echo 'PPDOCNAME=pp`date' '+%Y%m%d_%H%M%S_%3N`_$RANDOM;' echo '$PPDOCNAME' '>>' '/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end/pp-docker-list;' docker run --name '${PPDOCNAME}' -v '$PWD:$PWD' -w '$PWD' -u 2007:600 -i --rm c2997108/centos7:2-blast-taxid-2-KronaTools-2.7-pr2-mito bash run.sh input_1//alna4-02_1k_1.fq 100 0.95 8 ''
++ find input_1//
++ egrep '(_R1.*|_1)[.]f(ast|)q[.]gz$'
++ true
##count reads
+ echo '##count reads'
+++ cat input_1//alna4-02_1k_1.fq
+++ wc -l
++ expr 4000 / 4
+ n0=1000
+ echo '##convert fastq to fasta'
##convert fastq to fasta
+ j=input_1//alna4-02_1k_1.fasta
+ cat input_1//alna4-02_1k_1.fq
+ awk 'NR%4==1{print ">"substr($0,2)} NR%4==2{print $0}'
++ echo input_1//alna4-02_1k_1.fq
++ egrep '_1[.]f(ast|)q$'
++ wc -l
+ '[' 1 = 1 ']'
++ echo input_1//alna4-02_1k_1.fq
++ sed 's/_1[.]f\(ast\|\)q$/_2.f\1q/'
+ i2=input_1//alna4-02_1k_2.fq
++ echo input_1//alna4-02_1k_1.fasta
++ egrep '_1[.]fasta$'
++ wc -l
+ '[' 1 = 1 ']'
++ echo input_1//alna4-02_1k_1.fasta
++ sed 's/_1[.]fasta$/_2.fasta/'
+ j2=input_1//alna4-02_1k_2.fasta
+ cat input_1//alna4-02_1k_2.fq
+ awk 'NR%4==1{print ">"substr($0,2)} NR%4==2{print $0}'
+ echo '##blast'
##blast
+ blastn -db /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa -query input_1//alna4-02_1k_1.fasta -outfmt 6 -out input_1//alna4-02_1k_1.fasta.ssu.blast -num_threads 8
+ blastn -db /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa -query input_1//alna4-02_1k_2.fasta -outfmt 6 -out input_1//alna4-02_1k_2.fasta.ssu.blast -num_threads 8
+ python run-count-paired.py input_1//alna4-02_1k_1.fasta input_1//alna4-02_1k_2.fasta input_1//alna4-02_1k_1.fasta.ssu.blast input_1//alna4-02_1k_2.fasta.ssu.blast
+ rm input_1//alna4-02_1k_1.fasta input_1//alna4-02_1k_2.fasta
+ echo '##filtering'
##filtering
+ cat input_1//alna4-02_1k_1.fasta.ssu.all.blast
+ awk '-F\t' '$3>100{if(a[$1]==1){if($3>=topbit*0.95){print $0}}else{a[$1]=1; topbit=$3; print $0}}'
+ echo '##determine LCA'
##determine LCA
+ awk '-F\t' 'FILENAME==ARGV[1]{name[$1]=$4} FILENAME==ARGV[2]{print name[$2]"\t"$0}' /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa.name input_1//alna4-02_1k_1.fasta.ssu.blast.filtered
+ awk '-F\t' '
function searchLCA(data,  i, j, res, res2, str, n, stopflag){
 for(i in data){
  if(n==0){n=split(i,res,";")}
  else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}}
 }
 if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1};
 for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}}
 return str;
}
{
 if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0}
 else{data[$1]=1}
}
END{if(length(data)>0){print searchLCA(data)"\t"oldstr}}
' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name
+ awk '-F\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca
+ awk '-F\t' '{print "root;"$0}' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt
++ awk '-F\t' '{a+=$2} END{if(a==""){a=0}; print a}' input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt
+ cnt=4
++ expr 1000 - 4
+ echo -e 'No Hit\t996'
+ awk '-F\t' '
{n=split($1,arr,";"); ORS="\t"; print $2; for(i=1;i1){for(i=2;i<=NF;i++){a[i]+=$i}}}
 FILENAME==ARGV[2]{if(FNR==1){OFS="\t"; for(i=2;i<=NF;i++){$i=$i" (counts per 10000)"; if(a[i]==0){a[i]=1}}; print $0}
                   else{ORS=""; print $1;for(i=2;i<=NF;i++){print "\t"$i/a[i]*10000}; print "\n"}}
' all.counts.txt ./all.counts.txt
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20220809_213826_163_31272
+ echo pp20220809_213826_163_31272
+ docker run --name pp20220809_213826_163_31272 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.txt all.counts.xlsx
Start converting
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20220809_213827_534_28090
+ echo pp20220809_213827_534_28090
+ docker run --name pp20220809_213827_534_28090 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.per.10000.txt all.counts.per.10000.xlsx
Start converting
++ egrep '([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$'
++ find input_1//
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20220809_213828_966_30733
+ echo pp20220809_213828_966_30733
+ docker run --name pp20220809_213828_966_30733 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20220809_213829_670_20206
+ echo pp20220809_213829_670_20206
+ docker run --name pp20220809_213829_670_20206 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20220809_213830_384_9125
+ echo pp20220809_213830_384_9125
+ docker run --name pp20220809_213830_384_9125 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20220809_213831_169_13177
+ echo pp20220809_213831_169_13177
+ docker run --name pp20220809_213831_169_13177 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt2
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20220809_213831_896_27711
+ echo pp20220809_213831_896_27711
+ docker run --name pp20220809_213831_896_27711 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_paired-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1//alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt3
+ post_processing
+ '[' 1 = 1 ']'
+ echo 0
+ exit
PID: 61436