metagenome~PR2_NCBI-16S-mito-plastid_single-end

Meta-barcoding metagenome analysis pipeline using PR2 database and NCBI 16S, 23S, mito and plastid database

input_1:single-end FASTQ(.gz)

input_1/alna4-02_1k_1.fq

@SNL162:264:HHT3LBCXX:1:1104:3236:1913 1:N:0:TAAGGCGA+GCGTAAGA
GCTGTATAACATGCTTTTATAAAACCAGGTGATAAAATATTGGGTCTTGATTTATCTCATGGCGGACATCTTACTCATGGTTCTTCAGTTAATTTTAGTGG
+
DBDDD@G11<CGH1111<CCF1FEF0<<D<1<1<1<1111<1<1<1<1<1<FHFIIE1<@<?1/<<C/CEEF1G1<1FG11<1<1<C@<11<F11<DC<GE
@SNL162:264:HHT3LBCXX:1:1104:4529:1931 1:N:0:TAAGGCGA+GCGTAAGA
GGTCTGCACCGGTCAGGTTTGCGCCGGCCAGGTCCACACGGCCGAGTTGTGTATCAGTCAACCGTGCACTGGCCAGGTTCGCGTCGGTCAGGGTCGCGCTC
+
DDDDDEIGGH<C<C<DHGHI0D</DH//<CH1DGGE1@</</<<<F@GHIFEEFHG1<DFCHD/<1<11<<C1<FH1<1CHHDEHHH?GHH?ECEHIIIHE
@SNL162:264:HHT3LBCXX:1:1104:7853:1832 1:N:0:TAAGGCGA+GCGTAAGA
NCCTTATCTAGAAGAAATAAATAGATGCTCCTAATCTTTCTTAAAAGAATTTTTGTAGCTATACCTGTTCTTTTGGTAGTAACTAGTTTAACTTTTATTTT

Command

metagenome~PR2_NCBI-16S-mito-plastid_single-end -c 8 -m 32 input_1

Output

all.html

view all outputs

Log

pp metagenome~PR2_NCBI-16S-mito-plastid_single-end -c 8 -m 32 input_1
Checking the realpath of input files.
0 input_1
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end/input_1/alna4-02_1k_1.fq
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end/input_1/alna4-02_1k_1.fasta.ssu.all.blast
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end/input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end/input_1/alna4-02_1k_1.fq.html
1 /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end/input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt2.input
c2997108/centos7:2-blast-taxid-2-KronaTools-2.7-pr2-mito c2997108/centos7:3-java centos:centos6
using docker
+ set -o pipefail
+ cat
+ sed s/zcat/cat/g run-gz.sh
+ xargs -I '{}' -P 1 bash -c '{}'
++ find input_1/
++ egrep '(_R1.*|_1)[.]f(ast|)q$'
+ for i in '`find $input_1/|egrep "(_R1.*|_1)[.]f(ast|)q$"||true`'
+ echo 'PPDOCNAME=pp`date' '+%Y%m%d_%H%M%S_%3N`_$RANDOM;' echo '$PPDOCNAME' '>>' '/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end/pp-docker-list;' docker run --name '${PPDOCNAME}' -v '$PWD:$PWD' -w '$PWD' -u 2007:600 -i --rm c2997108/centos7:2-blast-taxid-2-KronaTools-2.7-pr2-mito bash run.sh input_1/alna4-02_1k_1.fq 100 0.95 8 ''
++ egrep '(_R1.*|_1)[.]f(ast|)q[.]gz$'
++ find input_1/
++ true
+ echo '##count reads'
##count reads
+++ cat input_1/alna4-02_1k_1.fq
+++ wc -l
++ expr 4000 / 4
##convert fastq to fasta
+ n0=1000
+ echo '##convert fastq to fasta'
+ j=input_1/alna4-02_1k_1.fasta
+ cat input_1/alna4-02_1k_1.fq
+ awk 'NR%4==1{print ">"substr($0,2)} NR%4==2{print $0}'
+ echo '##blast'
+ blastn -db /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa -query input_1/alna4-02_1k_1.fasta -outfmt 6 -out input_1/alna4-02_1k_1.fasta.ssu.all.blast -num_threads 8
##blast
+ echo 'blast status: ' 0
+ rm input_1/alna4-02_1k_1.fasta
blast status:  0
+ echo '##filtering'
##filtering
+ cat input_1/alna4-02_1k_1.fasta.ssu.all.blast
+ awk '-F\t' '$12>100{if(a[$1]==1){if($12>=topbit*0.95){print $0}}else{a[$1]=1; topbit=$12; print $0}}'
+ echo '##determine LCA'
##determine LCA
+ awk '-F\t' 'FILENAME==ARGV[1]{name[$1]=$4} FILENAME==ARGV[2]{print name[$2]"\t"$0}' /usr/local/blastdb/PR2_16S_23S_mito_plastid.maskadapters.havepath.fa.name input_1/alna4-02_1k_1.fasta.ssu.blast.filtered
+ awk '-F\t' '
function searchLCA(data,  i, j, res, res2, str, n, stopflag){
 for(i in data){
  if(n==0){n=split(i,res,";")}
  else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}}
 }
 if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1};
 for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}}
 return str;
}
{
 if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0}
 else{data[$1]=1}
}
END{if(length(data)>0){print searchLCA(data)"\t"oldstr}}
' input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name
+ awk '-F\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca
+ awk '-F\t' '{print "root;"$0}' input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt
++ awk '-F\t' '{a+=$2} END{if(a==""){a=0}; print a}' input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt
+ cnt=4
++ expr 1000 - 4
+ echo -e 'No Hit\t996'
+ awk '-F\t' '
{n=split($1,arr,";"); ORS="\t"; print $2; for(i=1;i1){for(i=2;i<=NF;i++){a[i]+=$i}}}
 FILENAME==ARGV[2]{if(FNR==1){OFS="\t"; for(i=2;i<=NF;i++){$i=$i" (counts per 10000)"; if(a[i]==0){a[i]=1}}; print $0}
                   else{ORS=""; print $1;for(i=2;i<=NF;i++){print "\t"$i/a[i]*10000}; print "\n"}}
' all.counts.txt ./all.counts.txt
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20221019_152708_534_26139
+ echo pp20221019_152708_534_26139
+ docker run --name pp20221019_152708_534_26139 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -u 2007:600 -i --rm c2997108/centos7:3-java java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.txt all.counts.xlsx
Start converting
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20221019_152709_923_19935
+ echo pp20221019_152709_923_19935
+ docker run --name pp20221019_152709_923_19935 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -u 2007:600 -i --rm c2997108/centos7:3-java java -Xmx1G -jar /usr/local/bin/excel2.jar all.counts.per.10000.txt all.counts.per.10000.xlsx
Start converting
++ find input_1/
++ egrep '([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$'
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20221019_152711_293_16208
+ echo pp20221019_152711_293_16208
+ docker run --name pp20221019_152711_293_16208 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1/alna4-02_1k_1.fasta.ssu.blast.filtered
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20221019_152711_972_27000
+ echo pp20221019_152711_972_27000
+ docker run --name pp20221019_152711_972_27000 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20221019_152712_636_5937
+ echo pp20221019_152712_636_5937
+ docker run --name pp20221019_152712_636_5937 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20221019_152713_324_31916
+ echo pp20221019_152713_324_31916
+ docker run --name pp20221019_152713_324_31916 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt2
+ for i in '`find $input_1/|egrep "([.]blast[.]filtered(|[.]name[.]lca(|[.]cnt|[.]cnt2|[.]cnt3)))$"`'
++ date +%Y%m%d_%H%M%S_%3N
+ PPDOCNAME=pp20221019_152713_999_13889
+ echo pp20221019_152713_999_13889
+ docker run --name pp20221019_152713_999_13889 -v /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end:/yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -w /yoshitake/test/metagenome~PR2_NCBI-16S-mito-plastid_single-end -u 2007:600 -i --rm c2997108/centos7:3-java rm -f input_1/alna4-02_1k_1.fasta.ssu.blast.filtered.name.lca.cnt3
+ post_processing
+ '[' 1 = 1 ']'
+ echo 0
+ exit
PID: 287458