nanopore~get-consensus

This pipeline clusters Nanopore sequences and generates a consensus sequence for each cluster, making it a highly useful alternative to Sanger sequencing.

input_1:Nanopore FASTQ(.gz) files

input_1/18S_01_51.fq.gz

input_1/18S_04_54.fq

@fc7796c1-a180-4ea3-ae66-20f23df82d60:1:Euk18S_R_NB54:Euk18S_F_NB04:138:1914:-1
TAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTCTAAATACATGCCGTATTAAGGCGAAACCGCGAATGGCTCATTAAATCAGTTACGGTTCCTTAGATGTTGACTATCTACATGGATAACTGTGGTAATTCTAGAGCTAATACATGCACAAAAGCTTCGACCTTAACGGAAGAAGCGCATTTATTAGACCAAGACCAATGGGGTTCTTTACGGAACTCCTTTATGTGGTGACTCTGAATAACTTTTGCTAATCGCATGGCCTATGAGCCGGCGATGAATCTTTCAAGTGTCTGCCTTATCAACTGTCGATGGTAGGTTATGCGCCTACCATGGTTGTAACGGGTAACGGGAATCAGGGTTCGATTCCTAGAGGGGAGCCTGAGAGATGGCTACCACATCCAAGGAAGGCAGCAGGCACGCAAATTACCCAATCCCAGAACGGGAGGTAGTGACTCAAAAATAACAATACAGGACTCTTTTGAGGCCCTGTAATTGGAATGAGAACAGTTTAAATCCTTTAACGAGGATCTATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGAAGTTAAAAAGCTCGTAGTTGGATCTCAGTTCGAGTCAGACGGTCCACTTGCCAGTGGTTACTGTCTTGACTGAACACTTTATCGAGTTGTCCTATGGTGCTCTTCGCCGAGTGTCATAGGCGATCGATACGTTTACTTTGAAAAAATTAGAGTGCTCAAAAAGCAGCGTTTAGCCCGAATAATGTTGCATGGAATAATGGAATAGGACCTCGGTTCTATTTTGTTGGTCTTCGGAATTGAAAATGATTAAGAGGGGACAGACGGGGGCATTCGTATTGCGACGCTAGAGGTGAATTCTTGGACCGTTGCAAGACGAACTAAAGCGAAGGCATTTGCCAAGAATGTTTTACATGAATCAGAACGAAAGTTAGCGGATCGAAGGCGATCAGATACCGCCCTAGTGCTAACCATAAACGATGCCAACCAGTAATAAGCCTGAGTTCCTTAAATGACTCGGCTTGGGACTTCCGGGAAACCAAAGTCTTCGGCTTGGAAGCACTGGTTGCAAAGCTGAAACTTAAAGAAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGAAAACTCACCCGGCCCGGACACTGGAAGGATTGACAGATTGAGAGCTCTTTCTTGATTCAGTGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGTGATTTGTCTGGTTAATTCCGATAACGAACGAGACTCTAGCCTACTAAATAGATCGTTGGCTTACAAACGCCTGAATGAGAATCTCTAGAGGGACAGGCGGTGATTCAACCGCACGAAACAGAGCAATAACAGGTCCGCGATGCCCTTAGATGTCCGGGGCCGCACGCGCGCTACACTGAAGTGATCAGCGTGCATTCTATGCCTACTCTGTCAAGAGTGGGAAACCCAATGACCTTCTTCGTGATTGGGATAGGAGATTGTAATTATTCTCCTTGAACGAGGAATTCCCAGTAAGCGCGAGTCATAAGCTCGCGTTGATTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTACTACCGATTGAATGTATTATTGAGGTCTTCGGACTGGCCGGTGCAGCCATTCTTGTAGTGGCAGCGCTGTGTTTGGAAAGATGCCCCAAAATTGTACATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAG
+
222933332322.+++**07:98999@@ACSLIKIHJL?>>>>GLKSIKMISJNIMOLNSSMSKSSOKRJGMGQIFLISPPMIQQSQPSKNRIMSQLNDEJSSJLSMLMQPOSSNOSSOLLPKJOPPMNKSPLM@@?GFHISGKJSMSPJLJKGGJ<AMSMLLISSONOB00/79:6***8:BBAAAAIJNINSLQSRKSSSKKMMRPMSSLLKSNLSJMMSOOSOSPSMJHKINGKGIJHIISMSSQSRHGGIIMSILSQNLOLILKMOQSKSKSKPBCACHGH??>>?HLSRSSOOOSSSKSOLOQPMSSSNSNIOKJIQPSSSSOJKJKMSSLLNSPSIKNIGMSFHLH*QD6@@@ALKLHMJNIS/-'''////((*2332413)((''(((*46:>?AAA@PLIISSOSKQSSSSKSSNJNQNKSOSOMLSKOPSLSJLH7HKHSKSKSI988889KSJBJEKHLKJLSJOSNMKMJIDBCSRSHJIGLQOSSPSSNMHDBABDSJMOSPSPNKHJFF88CHHGMLNRPJLSL@A@@BMACJMISOJKSKMKJLPLQSLNSMPNPSJOSSMLSOSMMHSJOSRSKMISSRSSOLSNMIRLMLJMMSSIFLHSMJSPSSNKSLKPSJSSJNPJNSJLSSSLSKLOSPSRONSJJSSSJPGC@**(&&())222GSSKSQSJSJJQIGFEGIISSISHJSMJSLISN88?=<<<<JGEEFKIKBAAAAKIKSMSMNJJNMLIPSSSPECABGCCCBC>8-,-.12/,,,,2223<899::?@AHEFEDEFFG@FEDC>,,*))+++558LKIINSEGEEDLGFBIG6666==:893-,+&%&')--.02009>?,3<GAAACCJISSLOKMSIKKHGHFGJFD@?;9//.222,'**/0127=;<<=GHGFHFEBDCDFFDDLLKGJJIHJIGDEA6655;;;<<;1++++,--,/.(%%%&(((%12,,,+000221122634445555GGIFGGGHGFFGKLMPQSSJEEE74445:E32225.*)))+56:;::888=C@ABA?;;?@>;ABABH666659;;;<32,+./2''''')'((&'+))))*''''((((()/69:9@><<<>?@GGFGE@1:9<GIJHIHEHGEGN

Command

nanopore~get-consensus -c 8 -m 32 input_1/

Output

output/18S_01_51.fq.gz_cluster0_4496reads.fasta

>consensus_18S_01_51.fq.gz_cluster0_4496reads
tTggATCCtGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTCTAAGTACATGCCGTATT
AAGGCGAAACCGCGAATGGCTCATTAAATCAGTTACGGTTCCTTAGATGTTGACTATCTACATGGATAAC
TGTGGAAAATCTAGAGCTAATACATGCACAAAAGCTTTGACCCTAGCCGGAAAAAGCGCATTTATTAGAC
CAAGACCAATGGGTTCGTCCCCGGCGACGGTGGGCGATACCCTCTTAGTGGTGACTCTGGATAACTTTTC
GCTAATCGCATGGCCTATGAGCCGGCGATGAATCTTTCAAGTGTCTGCCTTATCAACTGTCGATGGTAGG
TTATGCGCCTACCATGGTTGTAACGGGTAACGGGGAATCAGGGTTCGATTCCGGAGAGGGGGCTTGAGAA
TTGGCCACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCCAGAACGGGGAGGTAGTGACG
AAAAATAACAATACGGGACTCTTATGAGGCCCCGTAATTGGAATGAGAACAATCTAAATCCTTTAACGAG
GATCTATTAGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCTAATAGCGTATACTAAAGTT

output/18S_01_51.fq.gz_cluster0_4496reads.fasta.muscle

>09be950f-3be1-44e7-b878-5eb977338550:1:Euk18S_R_NB51:Euk18S_F_NB01:136:1955:-1
--------------------------------------------------------------------------------
----------------------------------------------------TAGTCATATGCTTGTCTCAAAGATTAAG
CCATGCATGTCGA-AAGATACATG-C-CGTATTAAA---AGGCTGATAAAACTGCGAATGGCTC-ATT-AAATCAGTTAC
GGTTCCTTA-------GA----TGTTGACTATCTACATGGATAACTGTGG-AAAATCTAG-A---GTGAAAAAGTACA-C
GCACAAAAGCTTTAACCCTAGCC-G-GAAAAAGCGCATTTATT--AGA----CCAAGACCAATGGGTTCGTCCCCGGCGA
CGGTG-GCTATACCC-TC--T--T-AGTGGTGACTCT----GG-ATAACTT-TTC---GCTAATCGCATGGCCTATGA-A
GCGGCGATGA---A-TCTTTCAAGTGTCT-GCCTTATCAACTGTCGATG----G----A---AGGTTATGCGCCTACCAT
GGTTGTAACGGGTAAACGGG-AATCA-GGGTTCGATTCCG-GCAGA--GGG-GGCTT----G-AGAA-TTGGCCACC---
---ACAT-CCAAGGA-AGGCAGCAGGTGCGCA---AATTCACCCAATCC-CTAGA-AC----G-GGA-AAAGTTAGCCGG

view all outputs

Log

pp nanopore~get-consensus -c 8 -m 32 input_1/
PID: 4107245
/home/yoshitake.kazutoshi/files/m256y/pp-dev/yoshitake/PortablePipeline/PortablePipeline/scripts/pp 'nanopore~get-consensus' -c 8 -m 32 input_1/
Checking the realpath of input files.
1
script: /suikou/files/m256y/yoshitake.kazutoshi/work/pp-dev/yoshitake/PortablePipeline/PortablePipeline/scripts/nanopore~get-consensus
Containers: c2997108/rocky9:dev_vsearch_muscle centos:centos6
using docker
++ set -o pipefail
+ set -eux
+ set -o pipefail
++ find input_1//
++ egrep '[.]f(ast|)q$'
+ r=input_1//18S_04_54.fq
++ find input_1//
++ egrep '[.]f(ast|)q[.]gz$'
+ rgz=input_1//18S_01_51.fq.gz
+ '[' input_1//18S_04_54.fqinput_1//18S_01_51.fq.gz = '' ']'
+ cat
/suikou/files/m256y/yoshitake.kazutoshi/work/pp-dev/yoshitake/PortablePipeline/PortablePipeline/scripts/nanopore~get-consensus: 行 71: 対応する `"' を探索中に予期しないファイル終了 (EOF) です

PID: 4109635
/home/yoshitake.kazutoshi/files/m256y/pp-dev/yoshitake/PortablePipeline/PortablePipeline/scripts/pp 'nanopore~get-consensus' -c 8 -m 32 input_1/
Checking the realpath of input files.
1
script: /suikou/files/m256y/yoshitake.kazutoshi/work/pp-dev/yoshitake/PortablePipeline/PortablePipeline/scripts/nanopore~get-consensus
Containers: c2997108/rocky9:dev_vsearch_muscle centos:centos6
using docker
++ set -o pipefail
+ set -eux
+ set -o pipefail
++ find input_1//
++ egrep '[.]f(ast|)q$'
+ r=input_1//18S_04_54.fq
++ find input_1//
++ egrep '[.]f(ast|)q[.]gz$'
+ rgz=input_1//18S_01_51.fq.gz
+ '[' input_1//18S_04_54.fqinput_1//18S_01_51.fq.gz = '' ']'
+ cat
+ find input_1//
+ grep -E '[.]f(ast|)q(|[.]gz)$'
+ read i
+ xargs '-d\n' -I '{}' -P 1 bash -c '{}'
+ echo 'PPDOCNAME=pp`date' '+%Y%m%d_%H%M%S_%3N`_$RANDOM;' echo '$PPDOCNAME' '>>' '/home/yoshitake.kazutoshi/files/m256y/pp-dev/yoshitake/test/nanopore~get-consensus/pp-docker-list;' docker run --name '${PPDOCNAME}' -v '$PWD:$PWD' -w '$PWD' -v /suikou/files/m256y/yoshitake.kazutoshi:/suikou/files/m256y/yoshitake.kazutoshi -u 2007:600 -i --rm c2997108/rocky9:dev_vsearch_muscle bash run-consensus.sh input_1//18S_01_51.fq.gz 0.9 10 30
+ read i
+ echo 'PPDOCNAME=pp`date' '+%Y%m%d_%H%M%S_%3N`_$RANDOM;' echo '$PPDOCNAME' '>>' '/home/yoshitake.kazutoshi/files/m256y/pp-dev/yoshitake/test/nanopore~get-consensus/pp-docker-list;' docker run --name '${PPDOCNAME}' -v '$PWD:$PWD' -w '$PWD' -v /suikou/files/m256y/yoshitake.kazutoshi:/suikou/files/m256y/yoshitake.kazutoshi -u 2007:600 -i --rm c2997108/rocky9:dev_vsearch_muscle bash run-consensus.sh input_1//18S_04_54.fq 0.9 10 30
+ read i
+ set -o pipefail
+ i=input_1//18S_01_51.fq.gz
+ opt_i=0.9
+ opt_k=10
+ opt_n=30
++ basename input_1//18S_01_51.fq.gz
+ workdir=work/18S_01_51.fq.gz.dir
+ mkdir -p work/18S_01_51.fq.gz.dir
+ vsearch --cluster_fast input_1//18S_01_51.fq.gz --id 0.9 --centroids work/18S_01_51.fq.gz.dir/vsearch-center.fasta --uc work/18S_01_51.fq.gz.dir/vsearch-clusters.uc
vsearch v2.30.0_linux_x86_64, 754.2GB RAM, 16 cores
https://github.com/torognes/vsearch

Reading file input_1//18S_01_51.fq.gz 100%
8187728 nt in 4562 seqs, min 1384, max 1907, avg 1795
Masking 100%
Sorting by length 100%
Counting k-mers 100%
Clustering 100%
Sorting clusters 100%
Writing clusters 100%
Clusters: 50 Size min 1, max 4496, avg 91.2
Singletons: 46, 1.0% of seqs, 92.0% of clusters
+ seqkit fx2tab input_1//18S_01_51.fq.gz
+ awk '-F\t' -v dir=work/18S_01_51.fq.gz.dir -v minreads=10 '
FILENAME==ARGV[1]{seq[$1]=$2; q[$1]=$3}
FILENAME==ARGV[2]{if($1=="S"){s[$2]=$9}else if($1=="H"){c[$2]++; r[$2][c[$2]]=$9}}
END{
for(i in c){
if(c[i]+1>=minreads){
print i"\t"c[i]+1; print ">"s[i]"\n"seq[s[i]] > dir"/cluster"i"_"c[i]+1"reads.fasta";
for(j in r[i]){print ">"r[i][j]"\n"seq[r[i][j]] > dir"/cluster"i"_"c[i]+1"reads.fasta"}
}
}
}' /dev/stdin work/18S_01_51.fq.gz.dir/vsearch-clusters.uc
0	4496
48	14
+ mkdir -p output
++ ls work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta
+ for j in `ls $workdir/cluster*_*reads.fasta 2> /dev/null`
+ head -n 60 work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta
+ muscle -align work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub -output work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub]
Input: 30 seqs, avg length 1837, max 1907, min 1820

00:00 4.7Mb     3.3% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 30 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb    0.23% Calc posteriors
00:01 9.3Gb     3.9% Calc posteriors
00:02 9.3Gb     7.4% Calc posteriors
00:03 8.8Gb    11.3% Calc posteriors
00:04 8.2Gb    16.1% Calc posteriors
00:05 8.7Gb    21.6% Calc posteriors
00:06 8.7Gb    26.4% Calc posteriors
00:07 9.3Gb    30.8% Calc posteriors
00:08 8.7Gb    35.9% Calc posteriors
00:09 9.3Gb    41.1% Calc posteriors
00:10 9.3Gb    46.2% Calc posteriors
00:11 9.2Gb    51.3% Calc posteriors
00:12 9.2Gb    55.9% Calc posteriors
00:13 9.2Gb    61.4% Calc posteriors
00:14 8.7Gb    66.2% Calc posteriors
00:15 8.7Gb    71.0% Calc posteriors
00:16 8.7Gb    76.1% Calc posteriors
00:17 8.4Gb    81.1% Calc posteriors
00:18 8.1Gb    86.2% Calc posteriors
00:19 9.2Gb    91.0% Calc posteriors
00:20 9.2Gb    96.1% Calc posteriors
00:21 2.8Gb    99.1% Calc posteriors
00:21 1.7Gb   100.0% Calc posteriors
00:21 1.2Gb     3.4% UPGMA5         
00:21 1.2Gb   100.0% UPGMA5
00:21 1.2Gb    0.23% Consistency (1/2)
00:22 1.2Gb     4.6% Consistency (1/2)
00:22 1.2Gb   100.0% Consistency (1/2)
00:22 1.2Gb    0.23% Consistency (2/2)
00:22 1.2Gb   100.0% Consistency (2/2)
00:22 1.2Gb     1.0% Refining         
00:23 1.2Gb     4.0% Refining
00:24 1.2Gb    64.0% Refining
00:24 1.2Gb   100.0% Refining
+ cons -sequence work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle -outseq work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle.fa
Create a consensus sequence from a multiple alignment
+ sed s/n//g work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle.fa
+ fastareformat work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle.fa.rmn
++ basename input_1//18S_01_51.fq.gz
++ basename work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta .fasta
+ echo '>consensus_18S_01_51.fq.gz_cluster0_4496reads'
+ tail -n+2 work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle.fa.rmn2
+ cat work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle.fa.rmn3 work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub
+ muscle -align work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.fa -output work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.fa.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.fa]
Input: 31 seqs, avg length 1836, max 1907, min 1813

00:00 4.7Mb     3.2% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 31 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb    0.22% Calc posteriors
00:01 7.2Gb     3.7% Calc posteriors
00:02 9.3Gb     7.1% Calc posteriors
00:03 8.7Gb    12.3% Calc posteriors
00:04 9.2Gb    17.2% Calc posteriors
00:05 8.2Gb    21.9% Calc posteriors
00:06 9.3Gb    26.0% Calc posteriors
00:07 8.7Gb    30.5% Calc posteriors
00:08 8.2Gb    36.3% Calc posteriors
00:09 8.7Gb    40.6% Calc posteriors
00:10 8.2Gb    44.7% Calc posteriors
00:11 8.7Gb    49.9% Calc posteriors
00:12 9.2Gb    54.6% Calc posteriors
00:13 8.7Gb    59.6% Calc posteriors
00:14 8.7Gb    64.3% Calc posteriors
00:15 8.7Gb    68.8% Calc posteriors
00:16 9.2Gb    73.8% Calc posteriors
00:17 9.2Gb    78.3% Calc posteriors
00:18 9.2Gb    83.4% Calc posteriors
00:19 8.7Gb    87.7% Calc posteriors
00:20 9.2Gb    92.7% Calc posteriors
00:21 8.2Gb    97.2% Calc posteriors
00:21 4.4Gb   100.0% Calc posteriors
00:22 1.2Gb     3.3% UPGMA5         
00:22 1.2Gb   100.0% UPGMA5
00:22 1.2Gb    0.22% Consistency (1/2)
00:22 1.2Gb   100.0% Consistency (1/2)
00:22 1.2Gb    0.22% Consistency (2/2)
00:23 1.2Gb    73.8% Consistency (2/2)
00:23 1.2Gb   100.0% Consistency (2/2)
00:23 1.2Gb     1.0% Refining         
00:24 1.2Gb    37.0% Refining
00:25 1.2Gb   100.0% Refining
++ basename input_1//18S_01_51.fq.gz
++ basename work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta
+ cp work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.muscle.fa.rmn3 output/18S_01_51.fq.gz_cluster0_4496reads.fasta
++ basename input_1//18S_01_51.fq.gz
++ basename work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta
+ cp work/18S_01_51.fq.gz.dir/cluster0_4496reads.fasta.sub.fa.muscle output/18S_01_51.fq.gz_cluster0_4496reads.fasta.muscle
+ for j in `ls $workdir/cluster*_*reads.fasta 2> /dev/null`
+ head -n 60 work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta
+ muscle -align work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub -output work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub]
Input: 14 seqs, avg length 1394, max 1404, min 1384

00:00 4.7Mb     7.1% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 14 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb     1.1% Calc posteriors
00:01 5.5Gb    40.7% Calc posteriors
00:02 5.5Gb    85.7% Calc posteriors
00:02 4.0Gb   100.0% Calc posteriors
00:02 1.1Gb     7.7% UPGMA5         
00:02 1.1Gb   100.0% UPGMA5
00:02 1.1Gb     1.1% Consistency (1/2)
00:02 1.2Gb   100.0% Consistency (1/2)
00:02 1.2Gb     1.1% Consistency (2/2)
00:02 1.2Gb   100.0% Consistency (2/2)
00:02 1.2Gb     1.0% Refining         
00:03 1.2Gb    18.0% Refining
00:03 1.2Gb   100.0% Refining
+ cons -sequence work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle -outseq work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle.fa
Create a consensus sequence from a multiple alignment
+ sed s/n//g work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle.fa
+ fastareformat work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle.fa.rmn
++ basename input_1//18S_01_51.fq.gz
++ basename work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta .fasta
+ echo '>consensus_18S_01_51.fq.gz_cluster48_14reads'
+ tail -n+2 work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle.fa.rmn2
+ cat work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle.fa.rmn3 work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub
+ muscle -align work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.fa -output work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.fa.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.fa]
Input: 15 seqs, avg length 1394, max 1404, min 1384

00:00 4.7Mb     6.7% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 15 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb    0.95% Calc posteriors
00:01 5.8Gb    16.2% Calc posteriors
00:02 5.5Gb    52.4% Calc posteriors
00:03 5.8Gb    91.4% Calc posteriors
00:03 3.6Gb   100.0% Calc posteriors
00:03 1.1Gb     7.1% UPGMA5         
00:03 1.1Gb   100.0% UPGMA5
00:03 1.1Gb    0.95% Consistency (1/2)
00:03 1.2Gb   100.0% Consistency (1/2)
00:03 1.2Gb    0.95% Consistency (2/2)
00:03 1.2Gb   100.0% Consistency (2/2)
00:03 1.2Gb     1.0% Refining         
00:04 1.2Gb    19.0% Refining
00:04 1.2Gb   100.0% Refining
++ basename input_1//18S_01_51.fq.gz
++ basename work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta
+ cp work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.muscle.fa.rmn3 output/18S_01_51.fq.gz_cluster48_14reads.fasta
++ basename input_1//18S_01_51.fq.gz
++ basename work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta
+ cp work/18S_01_51.fq.gz.dir/cluster48_14reads.fasta.sub.fa.muscle output/18S_01_51.fq.gz_cluster48_14reads.fasta.muscle
+ set -o pipefail
+ i=input_1//18S_04_54.fq
+ opt_i=0.9
+ opt_k=10
+ opt_n=30
++ basename input_1//18S_04_54.fq
+ workdir=work/18S_04_54.fq.dir
+ mkdir -p work/18S_04_54.fq.dir
+ vsearch --cluster_fast input_1//18S_04_54.fq --id 0.9 --centroids work/18S_04_54.fq.dir/vsearch-center.fasta --uc work/18S_04_54.fq.dir/vsearch-clusters.uc
vsearch v2.30.0_linux_x86_64, 754.2GB RAM, 16 cores
https://github.com/torognes/vsearch

Reading file input_1//18S_04_54.fq 100%
1009976 nt in 572 seqs, min 1143, max 2195, avg 1766
Masking 100%
Sorting by length 100%
Counting k-mers 100%
Clustering 100%
Sorting clusters 100%
Writing clusters 100%
Clusters: 32 Size min 1, max 389, avg 17.9
Singletons: 26, 4.5% of seqs, 81.2% of clusters
+ seqkit fx2tab input_1//18S_04_54.fq
+ awk '-F\t' -v dir=work/18S_04_54.fq.dir -v minreads=10 '
FILENAME==ARGV[1]{seq[$1]=$2; q[$1]=$3}
FILENAME==ARGV[2]{if($1=="S"){s[$2]=$9}else if($1=="H"){c[$2]++; r[$2][c[$2]]=$9}}
END{
for(i in c){
if(c[i]+1>=minreads){
print i"\t"c[i]+1; print ">"s[i]"\n"seq[s[i]] > dir"/cluster"i"_"c[i]+1"reads.fasta";
for(j in r[i]){print ">"r[i][j]"\n"seq[r[i][j]] > dir"/cluster"i"_"c[i]+1"reads.fasta"}
}
}
}' /dev/stdin work/18S_04_54.fq.dir/vsearch-clusters.uc
1	389
16	144
+ mkdir -p output
++ ls work/18S_04_54.fq.dir/cluster16_144reads.fasta work/18S_04_54.fq.dir/cluster1_389reads.fasta
+ for j in `ls $workdir/cluster*_*reads.fasta 2> /dev/null`
+ head -n 60 work/18S_04_54.fq.dir/cluster16_144reads.fasta
+ muscle -align work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub -output work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub]
Input: 30 seqs, avg length 1741, max 1758, min 1737

00:00 4.7Mb     3.3% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 30 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb    0.23% Calc posteriors
00:01 7.0Gb     5.3% Calc posteriors
00:02 7.5Gb    11.3% Calc posteriors
00:03 7.5Gb    16.6% Calc posteriors
00:04 8.0Gb    22.1% Calc posteriors
00:05 8.2Gb    27.6% Calc posteriors
00:06 8.4Gb    33.3% Calc posteriors
00:07 7.5Gb    38.2% Calc posteriors
00:08 8.4Gb    43.9% Calc posteriors
00:09 8.4Gb    50.1% Calc posteriors
00:10 8.2Gb    55.4% Calc posteriors
00:11 8.4Gb    61.1% Calc posteriors
00:12 7.9Gb    66.9% Calc posteriors
00:13 8.4Gb    72.4% Calc posteriors
00:14 8.4Gb    78.2% Calc posteriors
00:15 8.4Gb    83.7% Calc posteriors
00:16 8.4Gb    89.9% Calc posteriors
00:17 7.5Gb    94.9% Calc posteriors
00:18 4.1Gb    99.3% Calc posteriors
00:18 1.2Gb   100.0% Calc posteriors
00:19 1.2Gb     3.4% UPGMA5         
00:19 1.2Gb   100.0% UPGMA5
00:19 1.2Gb    0.23% Consistency (1/2)
00:19 1.2Gb   100.0% Consistency (1/2)
00:19 1.2Gb    0.23% Consistency (2/2)
00:19 1.2Gb   100.0% Consistency (2/2)
00:19 1.2Gb     1.0% Refining         
00:20 1.2Gb     8.0% Refining
00:21 1.2Gb    85.0% Refining
00:21 1.2Gb   100.0% Refining
+ cons -sequence work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle -outseq work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle.fa
Create a consensus sequence from a multiple alignment
+ sed s/n//g work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle.fa
+ fastareformat work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle.fa.rmn
++ basename input_1//18S_04_54.fq
++ basename work/18S_04_54.fq.dir/cluster16_144reads.fasta .fasta
+ echo '>consensus_18S_04_54.fq_cluster16_144reads'
+ tail -n+2 work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle.fa.rmn2
+ cat work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle.fa.rmn3 work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub
+ muscle -align work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.fa -output work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.fa.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.fa]
Input: 31 seqs, avg length 1741, max 1758, min 1736

00:00 4.7Mb     3.2% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 31 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb    0.22% Calc posteriors
00:01 7.7Gb     5.4% Calc posteriors
00:02 8.0Gb    10.5% Calc posteriors
00:03 8.4Gb    16.3% Calc posteriors
00:04 8.4Gb    21.1% Calc posteriors
00:05 8.4Gb    26.9% Calc posteriors
00:06 7.5Gb    32.0% Calc posteriors
00:07 7.5Gb    37.2% Calc posteriors
00:08 7.9Gb    43.0% Calc posteriors
00:09 7.5Gb    47.3% Calc posteriors
00:10 7.5Gb    52.9% Calc posteriors
00:11 7.9Gb    58.1% Calc posteriors
00:12 7.9Gb    63.7% Calc posteriors
00:13 7.9Gb    68.4% Calc posteriors
00:14 8.4Gb    74.4% Calc posteriors
00:15 7.9Gb    79.6% Calc posteriors
00:16 7.9Gb    84.9% Calc posteriors
00:17 7.9Gb    90.1% Calc posteriors
00:18 8.4Gb    95.7% Calc posteriors
00:19 5.5Gb   100.0% Calc posteriors
00:19 1.2Gb     3.3% UPGMA5         
00:19 1.2Gb   100.0% UPGMA5
00:19 1.2Gb    0.22% Consistency (1/2)
00:19 1.2Gb   100.0% Consistency (1/2)
00:19 1.2Gb    0.22% Consistency (2/2)
00:20 1.2Gb    69.7% Consistency (2/2)
00:20 1.2Gb   100.0% Consistency (2/2)
00:20 1.2Gb     1.0% Refining         
00:21 1.2Gb    45.0% Refining
00:21 1.2Gb   100.0% Refining
++ basename input_1//18S_04_54.fq
++ basename work/18S_04_54.fq.dir/cluster16_144reads.fasta
+ cp work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.muscle.fa.rmn3 output/18S_04_54.fq_cluster16_144reads.fasta
++ basename input_1//18S_04_54.fq
++ basename work/18S_04_54.fq.dir/cluster16_144reads.fasta
+ cp work/18S_04_54.fq.dir/cluster16_144reads.fasta.sub.fa.muscle output/18S_04_54.fq_cluster16_144reads.fasta.muscle
+ for j in `ls $workdir/cluster*_*reads.fasta 2> /dev/null`
+ head -n 60 work/18S_04_54.fq.dir/cluster1_389reads.fasta
+ muscle -align work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub -output work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub]
Input: 30 seqs, avg length 1801, max 1863, min 1794

00:00 4.7Mb     3.3% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 30 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb    0.23% Calc posteriors
00:01 9.0Gb     3.9% Calc posteriors
00:02 9.0Gb     7.6% Calc posteriors
00:03 8.4Gb    12.2% Calc posteriors
00:04 8.4Gb    17.9% Calc posteriors
00:05 9.0Gb    22.8% Calc posteriors
00:06 8.4Gb    28.3% Calc posteriors
00:07 8.4Gb    33.6% Calc posteriors
00:08 8.4Gb    38.6% Calc posteriors
00:09 8.4Gb    44.1% Calc posteriors
00:10 8.4Gb    49.4% Calc posteriors
00:11 7.9Gb    54.7% Calc posteriors
00:12 7.9Gb    59.5% Calc posteriors
00:13 8.9Gb    64.8% Calc posteriors
00:14 7.9Gb    69.9% Calc posteriors
00:15 8.4Gb    75.6% Calc posteriors
00:16 8.9Gb    80.5% Calc posteriors
00:17 8.9Gb    86.0% Calc posteriors
00:18 8.9Gb    91.3% Calc posteriors
00:19 8.9Gb    95.9% Calc posteriors
00:20 4.3Gb    99.3% Calc posteriors
00:20 1.7Gb   100.0% Calc posteriors
00:21 1.2Gb     3.4% UPGMA5         
00:21 1.2Gb   100.0% UPGMA5
00:21 1.2Gb    0.23% Consistency (1/2)
00:21 1.2Gb   100.0% Consistency (1/2)
00:21 1.2Gb    0.23% Consistency (2/2)
00:21 1.2Gb   100.0% Consistency (2/2)
00:22 1.2Gb     1.0% Refining         
00:23 1.2Gb    59.0% Refining
00:23 1.2Gb   100.0% Refining
+ cons -sequence work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle -outseq work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle.fa
Create a consensus sequence from a multiple alignment
+ sed s/n//g work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle.fa
+ fastareformat work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle.fa.rmn
++ basename input_1//18S_04_54.fq
++ basename work/18S_04_54.fq.dir/cluster1_389reads.fasta .fasta
+ echo '>consensus_18S_04_54.fq_cluster1_389reads'
+ tail -n+2 work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle.fa.rmn2
+ cat work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle.fa.rmn3 work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub
+ muscle -align work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.fa -output work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.fa.muscle

muscle 5.3.linux64 [d9725ac]  791Gb RAM, 16 cores
Built Nov 10 2024 22:58:59
(C) Copyright 2004-2021 Robert C. Edgar.
https://drive5.com

[align work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.fa]
Input: 31 seqs, avg length 1801, max 1863, min 1794

00:00 4.7Mb     3.2% Derep 1 uniques, 0 dupes
00:00 4.7Mb   100.0% Derep 31 uniques, 0 dupes
00:00 4.7Mb  CPU has 16 cores, running 16 threads
00:00 198Mb    0.22% Calc posteriors
00:01 8.7Gb     3.7% Calc posteriors
00:02 9.0Gb     7.1% Calc posteriors
00:03 9.0Gb    12.7% Calc posteriors
00:04 8.4Gb    17.2% Calc posteriors
00:05 7.9Gb    22.6% Calc posteriors
00:06 8.4Gb    27.3% Calc posteriors
00:07 8.9Gb    32.5% Calc posteriors
00:08 8.9Gb    37.0% Calc posteriors
00:09 8.9Gb    41.9% Calc posteriors
00:10 8.7Gb    46.9% Calc posteriors
00:11 8.4Gb    51.8% Calc posteriors
00:12 8.9Gb    56.6% Calc posteriors
00:13 8.9Gb    61.9% Calc posteriors
00:14 8.4Gb    66.7% Calc posteriors
00:15 8.9Gb    71.6% Calc posteriors
00:16 8.4Gb    76.1% Calc posteriors
00:17 8.4Gb    81.3% Calc posteriors
00:18 8.4Gb    86.0% Calc posteriors
00:19 7.9Gb    91.4% Calc posteriors
00:20 7.4Gb    95.9% Calc posteriors
00:21 1.7Gb   100.0% Calc posteriors
00:21 1.2Gb     3.3% UPGMA5         
00:21 1.2Gb   100.0% UPGMA5
00:21 1.2Gb    0.22% Consistency (1/2)
00:22 1.2Gb    43.4% Consistency (1/2)
00:22 1.2Gb   100.0% Consistency (1/2)
00:22 1.2Gb    0.22% Consistency (2/2)
00:22 1.2Gb   100.0% Consistency (2/2)
00:22 1.2Gb     1.0% Refining         
00:23 1.2Gb    10.0% Refining
00:24 1.2Gb    78.0% Refining
00:24 1.2Gb   100.0% Refining
++ basename input_1//18S_04_54.fq
++ basename work/18S_04_54.fq.dir/cluster1_389reads.fasta
+ cp work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.muscle.fa.rmn3 output/18S_04_54.fq_cluster1_389reads.fasta
++ basename input_1//18S_04_54.fq
++ basename work/18S_04_54.fq.dir/cluster1_389reads.fasta
+ cp work/18S_04_54.fq.dir/cluster1_389reads.fasta.sub.fa.muscle output/18S_04_54.fq_cluster1_389reads.fasta.muscle
+ post_processing
+ '[' 1 = 1 ']'
+ rm -f /home/yoshitake.kazutoshi/files/m256y/pp-dev/yoshitake/test/nanopore~get-consensus/pp-singularity-flag
+ '[' '' = y ']'
+ echo 0
+ exit