MetaGOflow: An EOSC-Life Project Workflow for Marine Genomic Observatories' Data Analysis Using MGnify Pipeline

public 1yr ago Version: eosc-life-gos @ deb5427 0 bookmarks

View Workflow

a-workflow-for-marine-genomic-observatories-data-a — View Workflow

Help improve this workflow!

This workflow has been published but could be further improved with some additional meta data:

Keyword(s) in categories input, output, operation

You can help improve this workflow by suggesting the addition or removal of keywords, suggest changes and report issues, or request to become a maintainer of the Workflow .

MetaGOflow: A workflow for marine Genomic Observatories' data analysis An EOSC-Life project The workflows developed in the framework of this project are based on pipeline-v5 of the MGnify resource.

Dependencies To run metaGOflow you need to make sure you have the following set on your computing environmnet first:

python3 [v 3.8+]
Docker [v 19.+] or Singularity [v 3.7.+]/ Apptainer [v 1.+]
cwltool [v 3.+]
rdflib [v 6.+]
rdflib-jsonld [v 0.6.2]
ro-crate-py [v 0.7.0]
pyyaml [v 6.0]
Node.js [v 10.24.0+]
Available storage ~235GB for databases

Code Snippets

baseCommand: [emapper.py]

inputs:
  fasta_file:
    format: edam:format_1929  # FASTA
    type: File?
    inputBinding:
      separate: true
      prefix: -i
    label: Input FASTA file containing query sequences

  db:
    type: [string?, File?]  # data/eggnog.db
    inputBinding:
      prefix: --database
    label: specify the target database for sequence searches (euk,bact,arch, host:port, local hmmpressed database)

  db_diamond:
    type: [string?, File?]  # data/eggnog_proteins.dmnd
    inputBinding:
      prefix: --dmnd_db
    label: Path to DIAMOND-compatible database

  data_dir:
    type: [string?, Directory?]  # data/
    inputBinding:
      prefix: --data_dir
    label: Directory to use for DATA_PATH

  mode:
    type: string?
    inputBinding:
      prefix: -m
    label: hmmer or diamond

  no_annot:
    type: boolean?
    inputBinding:
      prefix: --no_annot
    label: Skip functional annotation, reporting only hits

  no_file_comments:
    type: boolean?
    inputBinding:
      prefix: --no_file_comments
    label: No header lines nor stats are included in the output files

  cpu:
    type: int?
    inputBinding:
      prefix: --cpu
    default: 8

  annotate_hits_table:
    type: File?
    inputBinding:
      prefix: --annotate_hits_table
    label: Annotatate TSV formatted table of query->hits

  dbmem:
    type: boolean?
    inputBinding:
      prefix: --dbmem
    label: Store the whole eggNOG sqlite DB into memory before retrieving the annotations. This requires ~44 GB of RAM memory available.

  output:
    type: string?
    inputBinding:
      prefix: -o

CWL Diamond eggNOG-mapper v2 hmmer From line 21 of EggNOG/eggnog.cwl

baseCommand: [ split_to_chunks.py ]

inputs:
  seqs:
    # format: edam:format_1929  # collision with concatenate.cwl
    type: File
    inputBinding:
      prefix: -i
  chunk_size:
    type: int
    inputBinding:
      prefix: -s
  file_format:
    type: string?
    inputBinding:
      prefix: -f

CWL From line 24 of chunks/protein_chunker.cwl

baseCommand: [ run_FGS.sh ]

# arguments:
# ./FragGeneScan -s SRR1620013_MERGED_FASTQ.fasta -o fgs -w 0 -t illumina_10

inputs:
  input_fasta:
    format: 'edam:format_1929'
    type: File
    inputBinding:
      separate: true
      prefix: "-i"
  output:
    type: string
    inputBinding:
      separate: true
      prefix: "-o"
  seq_type:
    type: string
    inputBinding:
      separate: true
      prefix: "-s"
  train:
    type: string?
    inputBinding:
      separate: true
      prefix: "-t"
    default: "illumina_10"


# stdout: stdout.txt
# stderr: stderr.txt

CWL FragGeneScan From line 16 of FragGeneScan/FGS.cwl

baseCommand: [ unite_protein_predictions.py ]

inputs:
  masking_file:
    type: File?
    inputBinding:
      prefix: "--mask"
  predicted_proteins_prodigal_out:
    type: File?
    inputBinding:
      prefix: "--prodigal-out"
  predicted_proteins_prodigal_ffn:
    type: File?
    inputBinding:
      prefix: "--prodigal-ffn"
  predicted_proteins_prodigal_faa:
    type: File?
    inputBinding:
      prefix: "--prodigal-faa"
  predicted_proteins_fgs_out:
    type: File
    inputBinding:
      prefix: "--fgs-out"
  predicted_proteins_fgs_ffn:
    type: File
    inputBinding:
      prefix: "--fgs-ffn"
  predicted_proteins_fgs_faa:
    inputBinding:
      prefix: "--fgs-faa"
    type: File
  basename:
    inputBinding:
      prefix: "--name"
    type: string
  genecaller_order:
    inputBinding:
      prefix: "--caller-priority"
    type: string?

CWL From line 16 of Combined_gene_caller/post-processing.cwl

baseCommand: [ fastp ]

arguments: [
        $(inputs.detect_adapter_for_pe),
        $(inputs.overrepresentation_analysis),
        $(inputs.merge),
        $(inputs.merged_out),
        $(inputs.cut_right), 
        $(inputs.base_correction),
        $(inputs.overlap_len_require),
        $(inputs.force_polyg_tail_trimming),
        $(inputs.min_length_required),

        --thread=$(inputs.threads),
        --html, "fastp.html", 
        --json, "fastp.json",
        -i, $(inputs.forward_reads),
        -I, $(inputs.reverse_reads),
        -o, $(inputs.forward_reads.nameroot).trimmed.fastq,
        -O, $(inputs.reverse_reads.nameroot).trimmed.fastq
]

inputs:

  detect_adapter_for_pe:
    type: boolean
    default: false
    inputBinding: 
      valueFrom:
        ${
          if (inputs.detect_adapter_for_pe == true){
            return '--detect_adapter_for_pe';
          } else {
            return '';
          }
        }

  overrepresentation_analysis:
    type: boolean
    default: false
    inputBinding: 
      valueFrom:
        ${
          if (inputs.overrepresentation_analysis == true){
            return '--overrepresentation_analysis';
          } else {
            return '';
          }
        }

  merge: 
    type: boolean
    default: true
    inputBinding: 
      valueFrom: 
        ${
          if (inputs.merge != false){
            return '--merge';
          } else {
            return '';
          }
        }

  merged_out: 
    type: boolean?
    default: true
    inputBinding: 
      prefix: --merged_out
      valueFrom: 
        ${
          if (inputs.merge != false){
            return inputs.forward_reads.nameroot.split(/_(.*)/s)[0] + '.merged.fastq';
          } else {
            return '';
          }
        }

  forward_reads:
    type: File
    format:
      - edam:format_1930 # FASTA
      - edam:format_1929 # FASTQ

  reverse_reads:
    format:
      - edam:format_1930 # FASTA
      - edam:format_1929 # FASTQ
    type: File?

  threads:
    type: int?
    default: 1

  qualified_phred_quality:
    type: int?
    default: 0
    inputBinding: 
      valueFrom: 
        ${
          if (inputs.qualified_phred_quality > 0) {
            return '--qualified_quality_phred=' + inputs.qualified_phred_quality
          } else {
            return ''
          }
        }

  unqualified_percent_limit:
    type: int?
    default: 0
    inputBinding: 
      valueFrom: 
        ${
          if (inputs.unqualified_percent_limit > 0) {
            return '--unqualified_percent_limit=' + inputs.unqualified_percent_limit
          } else {
            return ''
          }
        }

  min_length_required:
    type: int?
    default: 0
    inputBinding: 
      valueFrom: 
        ${
          if (inputs.min_length_required > 0) {
            return '--length_required=' + inputs.min_length_required
          } else {
            return ''
          }
        }

  force_polyg_tail_trimming:
    type: boolean?
    default: false
    inputBinding:
      valueFrom: 
        ${
          if (inputs.force_polyg_tail_trimming != false){
            return '--trim_poly_g';
          } else {
            return '';
          }
        }

  disable_trim_poly_g:
    type: boolean?
    default: false
    inputBinding:
      valueFrom: 
        ${
          if (inputs.disable_trim_poly_g == true){
            return '--disable_trim_poly_g';
          } else {
            return '';
          }
        }

  base_correction:
    type: boolean?
    default: false
    inputBinding:
      valueFrom: 
        ${
          if (inputs.merge == true && inputs.base_correction == true){
            return '--correction';
          } else {
            return '';
          }
        }

  overlap_len_require: 
    type: int
    default: 0
    inputBinding:
      valueFrom:
        ${
          if (inputs.merge == true){
            return '--overlap_len_require='+inputs.overlap_len_require;
          } else {
            return '';
          }
        }

  cut_right: 
    type: boolean
    default: true
    inputBinding:
      valueFrom: 
        ${
          if (inputs.cut_right == true){
            return '--cut_right'
          } else {
            return ''
          }
        }


#  overlap_diff_limit (default 5) and overlap_diff_limit_percent (default 20%). 
#  Please note that the reads should meet these three conditions simultaneously.

CWL fastp From line 14 of fastp/fastp.cwl

baseCommand: [ "go_summary_pipeline-1.0.py" ]

inputs:
  InterProScan_results:
    type: File
    format: edam:format_3475
    inputBinding:
      prefix: --input-file

  config:
    type: [string?, File?]
    inputBinding:
      prefix: --config
    default: "go_summary-config.json"

  output_name:
    type: string

arguments:
  - "--output-file"
  - $(inputs.output_name)

CWL From line 19 of GO-slim/go_summary.cwl

baseCommand: [ hmmscan_tab.py ]  # old was with sed

arguments:
  - valueFrom: $(inputs.input_table.nameroot).tsv
    prefix: -o

CWL From line 23 of hmmer_tab_modification/hmmer_tab_modification.cwl

baseCommand: ["hmmsearch"]

inputs:

  omit_alignment:
    type: boolean?
    inputBinding:
      position: 1
      prefix: "--noali"

  gathering_bit_score:
    type: boolean?
    inputBinding:
      position: 4
      prefix: "--cut_ga"

  database:
    type: string
    doc: |
      "Database name or path, depending on how your using it."

  database_directory:
    type: [string, Directory?]
    doc: |
      "Database path"

  seqfile:
    format: edam:format_1929  # FASTA
    type: File
    inputBinding:
      position: 6
      separate: true

arguments:
  - valueFrom: |
      ${
        if (inputs.database_directory && inputs.database_directory !== "") {
          var path = inputs.database_directory.path || inputs.database_directory; 
          return path + "/" + inputs.database;
        } else {
          return inputs.database;
        }
      }
    position: 5
  - prefix: --domtblout
    valueFrom: $(inputs.seqfile.nameroot)_hmmsearch.tbl
    position: 2
  - prefix: --cpu
    valueFrom: '4'
  # hmmer is too verbose
  # discard all the std output and error
  - prefix: -o
    valueFrom: '/dev/null'
  - valueFrom: '> /dev/null'
    shellQuote: false
    position: 10
  - valueFrom: '2> /dev/null'
    shellQuote: false
    position: 11

CWL hmmsearch (genouest) hmmer From line 35 of hmmsearch/hmmsearch.cwl

baseCommand: [ interproscan.sh ]

inputs:

  inputFile:
    type: File
    format: edam:format_1929
    inputBinding:
      position: 8
      prefix: '--input'
    label: Input file path
    doc: >-
      Optional, path to fasta file that should be loaded on Master startup.
      Alternatively, in CONVERT mode, the InterProScan 5 XML file to convert.

  applications:
    type: string[]?
    inputBinding:
      position: 9
      itemSeparator: ','
      prefix: '--applications'
    label: Analysis
    doc: >-
      Optional, comma separated list of analyses. If this option is not set, ALL
      analyses will be run.

  databases:
    type: [string?, Directory]

  cpu:
    type: int
    default: 8
    inputBinding:
      position: 2
      prefix: '--cpu'
    label: Number of CPUs
    doc: >-
      Optional, number of CPUs to use. If not set, the number of CPUs available
      on the machine will be used.

  disableResidueAnnotation:
    type: boolean?
    inputBinding:
      position: 11
      prefix: '--disable-residue-annot'
    label: Disables residue annotation
    doc: 'Optional, excludes sites from the XML, JSON output.'


arguments:
  - position: 0
    valueFrom: '--disable-precalc'
  - position: 1
    valueFrom: '--goterms'
  - position: 2
    valueFrom: '--pathways'
  - position: 3
    prefix: '--tempdir'
    valueFrom: $(runtime.tmpdir)
  - position: 7
    valueFrom: 'TSV'
    prefix: '-f'
  - position: 8
    valueFrom: $(runtime.outdir)/$(inputs.inputFile.nameroot).IPS.tsv
    prefix: '-o'

CWL InterProScan (EBI) From line 24 of InterProScan/InterProScan-v5.cwl

baseCommand: [ "run_quality_filtering.py" ]

inputs:
  seq_file:
    type: File
    # format: edam:format_1929  # FASTA
    inputBinding:
      position: 1
    label: 'Trimmed sequence file'
    doc: >
      Trimmed and FASTQ to FASTA converted sequences file.
  submitted_seq_count:
    type: int
    label: 'Number of submitted sequences'
    doc: >
      Number of originally submitted sequences as in the user
      submitted FASTQ file - single end FASTQ or pair end merged FASTQ file.
  # stats_file_name:
  #   type: string
  #   default: stats_summary
  #   label: 'Post QC stats output file name'
  #   doc: >
  #     Give a name for the file which will hold the stats after QC.
  min_length:
    type: int
    default: 100 # For assemblies we need to set this in the input YAML to 500
    label: 'Minimum read or contig length'
    doc: >
      Specify the minimum read or contig length for sequences to pass QC filtering.
  input_file_format: string


outputs:
  filtered_file:
    label: Filtered output file
    format: edam:format_1929  # FASTA
    type: File
    outputBinding:
      glob: $(inputs.seq_file.nameroot).fasta
  stats_summary_file:
    label: Stats summary output file
    type: File
    outputBinding:
      glob: $(inputs.seq_file.nameroot).qc_summary

arguments:
   - position: 2
     valueFrom: $(inputs.seq_file.nameroot).fasta
   - position: 3
     valueFrom: $(inputs.seq_file.nameroot).qc_summary
   - position: 4
     valueFrom: $(inputs.submitted_seq_count)
   - position: 5
     prefix: '--min_length'
     valueFrom: $(inputs.min_length)
   - position: 6
     prefix: '--extension'
     valueFrom: $(inputs.input_file_format)

CWL From line 16 of qc-filtering/qc-filtering.cwl

baseCommand: ["MGRAST_base.py" ]

inputs:
  QCed_reads:
    type: File
    format: edam:format_1929  # FASTA
    inputBinding:
      prefix: -i
  length_sum:
    label: Prefix for the files assocaited with sequence length distribution
    type: string
    default: seq-length.out
  gc_sum:
    label: Prefix for the files associated with GC distribution
    type: string
    default: GC-distribution.out
  nucleotide_distribution:
    label: Prefix for the files associated with nucleotide distribution
    type: string
    default: nucleotide-distribution.out
  summary:
    label: File names for summary of sequences, e.g. number, min/max length etc.
    type: string
    default: summary.out
  max_seq:
    label: Maximum number of sequences to sub-sample 
    type: int?
    default: 2000000
  out_dir_name:
    label: Specifies output subdirectory
    type: string
    default: qc-statistics
  sequence_count:
    label: Specifies the number of sequences in the input read file (FASTA formatted)
    type: int


outputs:
  output_dir:
    label: Contains all stats output files
    type: Directory
    outputBinding:
      glob: $(inputs.out_dir_name)
  summary_out:
    label: Contains the summary statistics for the input sequence file
    type: File
    format: iana:text/plain
    outputBinding:
      glob: $(inputs.out_dir_name)/$(inputs.summary)

arguments:
   - position: 1
     prefix: '-o'
     valueFrom: $(inputs.out_dir_name)/$(inputs.summary)
   - position: 2
     prefix: '-d'
     valueFrom: |
       ${ var suffix = '.full';
          if (inputs.sequence_count > inputs.max_seq) {
            suffix = '.sub-set';
          }
          return "".concat(inputs.out_dir_name, '/', inputs.nucleotide_distribution, suffix);
       }
   - position: 3
     prefix: '-g'
     valueFrom: |
       ${ var suffix = '.full';
          if (inputs.sequence_count > inputs.max_seq) {
            suffix = '.sub-set';
          }
          return "".concat(inputs.out_dir_name, '/', inputs.gc_sum, suffix);
       }
   - position: 4
     prefix: '-l'
     valueFrom: |
       ${ var suffix = '.full';
          if (inputs.sequence_count > inputs.max_seq) {
            suffix = '.sub-set';
          }
          return "".concat(inputs.out_dir_name, '/', inputs.length_sum, suffix);
       }
   - position: 5
     valueFrom: ${ if (inputs.sequence_count > inputs.max_seq) { return '-m '.concat(inputs.max_seq)} else { return ''} }

CWL From line 24 of qc-stats/qc-stats.cwl

baseCommand: [clean_motus_output.sh]

CWL Metagenomic operational taxonomic units (mOTUs) From line 21 of mOTUs/clean_motus_output.cwl

baseCommand: [ motus ]

inputs:
  reads:
    type: File
    inputBinding:
      position: 1
      prefix: -s
    label: merged and QC reads in fastq
    # format: edam:format_1930  # FASTQ

  threads:
    type: int
    inputBinding:
      prefix: -t
    default: 4


arguments: [profile, -c, -q]

CWL Metagenomic operational taxonomic units (mOTUs) From line 21 of mOTUs/mOTUs.cwl

baseCommand: [ "biom-convert.sh" ]

inputs:
  biom:
    type: File?
    format: edam:format_3746  # BIOM
    inputBinding:
      prefix: --input-fp

  table_type:
    type: string? #biom-convert-table.yaml#table_type?
    inputBinding:
      prefix: --table-type  # --table-type=    <- worked for cwlexec
      separate: true # false                                  <- worked for cwlexec
      valueFrom: $(inputs.table_type)  # $('"' + inputs.table_type + '"')      <- worked for cwlexec

  json:
    type: boolean?
    label: Output as JSON-formatted table.
    inputBinding:
      prefix: --to-json

  hdf5:
    type: boolean?
    label: Output as HDF5-formatted table.
    inputBinding:
      prefix: --to-hdf5

  tsv:
    type: boolean?
    label: Output as TSV-formatted (classic) table.
    inputBinding:
      prefix: --to-tsv

  header_key:
    type: string?
    doc: |
      The observation metadata to include from the input BIOM table file when
      creating a tsv table file. By default no observation metadata will be
      included.
    inputBinding:
      prefix: --header-key

arguments:
  - valueFrom: |
     ${ var ext = "";
        if (inputs.json) { ext = "_json.biom"; }
        if (inputs.hdf5) { ext = "_hdf5.biom"; }
        if (inputs.tsv) { ext = "_tsv.biom"; }
        var pre = inputs.biom.nameroot.split('.');
        pre.pop()
        return pre.join('.') + ext; }
    prefix: --output-fp
  - valueFrom: "--collapsed-observations"

CWL From line 19 of biom-convert/biom-convert.cwl

baseCommand: [ cmsearch-deoverlap.pl ]

inputs:
  - id: clan_information
    type: [string?, File?]
    inputBinding:
      position: 0
      prefix: '--clanin'
    label: clan information on the models provided
    doc: Not all models provided need to be a member of a clan
  - id: cmsearch_matches
    type: File
    format: edam:format_3475
    inputBinding:
      position: 1
      valueFrom: $(self.basename)

CWL From line 29 of cmsearch-deoverlap/cmsearch-deoverlap-v0.02.cwl

baseCommand: [ cmsearch ]

inputs:
  - id: covariance_model_database
    type: [string, File]
    inputBinding:
      position: 1
  - id: cpu
    type: int?
    inputBinding:
      position: 0
      prefix: '--cpu'
    label: Number of parallel CPU workers to use for multithreads
  - default: false
    id: cut_ga
    type: boolean?
    inputBinding:
      position: 0
      prefix: '--cut_ga'
    label: use CM's GA gathering cutoffs as reporting thresholds
  - id: omit_alignment_section
    type: boolean?
    inputBinding:
      position: 0
      prefix: '--noali'
    label: Omit the alignment section from the main output.
    doc: This can greatly reduce the output volume.
  - default: false
    id: only_hmm
    type: boolean?
    inputBinding:
      position: 0
      prefix: '--hmmonly'
    label: 'Only use the filter profile HMM for searches, do not use the CM'
    doc: |
      Only filter stages F1 through F3 will be executed, using strict P-value
      thresholds (0.02 for F1, 0.001 for F2 and 0.00001 for F3). Additionally
      a bias composition filter is used after the F1 stage (with P=0.02
      survival threshold). Any hit that survives all stages and has an HMM
      E-value or bit score above the reporting threshold will be output.
  - id: query_sequences
    type: File
    format: edam:format_1929  # FASTA
    inputBinding:
      position: 2
    # streamable: true
  - id: search_space_size
    type: int
    inputBinding:
      position: 0
      prefix: '-Z'
    label: search space size in *Mb* to <x> for E-value calculations

arguments:
  - position: 0
    prefix: '--tblout'
    valueFrom: |
      ${
        var name = "";
        if (typeof inputs.covariance_model_database === "string") {
          name =
            inputs.query_sequences.basename +
            "." +
            inputs.covariance_model_database.split("/").slice(-1)[0] +
            ".cmsearch_matches.tbl";
        } else {
          name =
            inputs.query_sequences.basename +
            "." +
            inputs.covariance_model_database.nameroot +
            ".cmsearch_matches.tbl";
        }
        return name;
      }
  - position: 0
    prefix: '-o'
    valueFrom: |
      ${
        var name = "";
        if (typeof inputs.covariance_model_database == "string") {
          name =
            inputs.query_sequences.basename +
            "." +
            inputs.covariance_model_database.split("/").slice(-1)[0] +
            ".cmsearch.out";
        } else {
          name =
            inputs.query_sequences.basename +
            "." +
            inputs.covariance_model_database.nameroot +
            ".cmsearch.out";
        }
        return name;
      }

CWL cmsearch From line 23 of cmsearch/infernal-cmsearch-v1.1.2.cwl

baseCommand: [ esl-index.sh ]

CWL easel From line 12 of easel/esl-sfetch-index.cwl

baseCommand: [ esl-sfetch ]

CWL easel From line 35 of easel/esl-sfetch-manyseqs.cwl

baseCommand: get_subunits_coords.py

CWL From line 28 of get_subunits_coords/get_subunits_coords.cwl

baseCommand: get_subunits.py

CWL From line 45 of get_subunits_fasta/get_subunits.cwl

baseCommand: ktImportText

arguments:
  - valueFrom: "krona.html"
    prefix: -o

CWL Krona From line 25 of krona/krona.cwl

baseCommand: [ 'mapseq2biom.pl' ]

inputs:
  otu_table:
    type: [string, File]
    doc: |
      the OTU table produced for the taxonomies found in the reference
      databases that was used with MAPseq
    inputBinding:
      prefix: --otuTable 

  query:
    type: File
    label: the output from the MAPseq that assigns a taxonomy to a sequence
    format: iana:text/tab-separated-values
    inputBinding:
      prefix: --query

  label:
    type: string
    label: label to add to the top of the outfile OTU table
    inputBinding:
      prefix: --label

  taxid_flag:
    type: boolean?
    label: output NCBI taxids for all databases bar UNITE
    inputBinding:
        prefix: --taxid

arguments:
  - valueFrom: $(inputs.query.basename).tsv
    prefix: --outfile
  - valueFrom: $(inputs.query.basename).txt
    prefix: --krona
  - valueFrom: $(inputs.query.basename).notaxid.tsv
    prefix: --notaxidfile

CWL From line 15 of mapseq2biom/mapseq2biom.cwl

baseCommand: mapseq

inputs:

  prefix: File

  sequences:
    type: File
    inputBinding:
      position: 1
    format: edam:format_1929  # FASTA

  database:
    type: File
    inputBinding:
      position: 2
    secondaryFiles: .mscluster
    format: edam:format_1929  

  taxonomy:
    type: [string, File]
    inputBinding:
      position: 4

  threads: 
    type: int?
    default: 8
    inputBinding:
      prefix: "-nthreads"
      position: 5


arguments: ['-tophits', '80', '-topotus', '40', '-outfmt', 'simple']

CWL MAPseq From line 23 of mapseq/mapseq.cwl

baseCommand: [pull_ncrnas.sh]

CWL From line 28 of pull_ncrnas/pull_ncrnas.cwl

baseCommand: [functional_stats.py]

CWL From line 44 of summaries/functional_stats.cwl

baseCommand: [write_summaries.py]

CWL From line 52 of summaries/write_summaries.cwl

baseCommand: [ add_header ]

inputs:
  input_table:
    #format: [edam:format_3475, edam:format_2333]
    type: File
    inputBinding:
      prefix: -i
  header:
    type: string
    inputBinding:
      prefix: -h

CWL From line 18 of add_header/add_header.cwl

baseCommand: [ count_lines.py ]

inputs:
  sequences:
    type: File
    inputBinding:
      prefix: -f
  number:
    type: int
    inputBinding:
      prefix: -n

CWL From line 15 of count_lines/count_lines.cwl

baseCommand: [ bash ]

arguments:
  - valueFrom: |
      expr \$(cat $(inputs.input_file.path) | wc -l)
    prefix: -c

CWL From line 24 of utils/count_number_lines.cwl

arguments:
  - valueFrom: $(inputs.fastq.nameroot).unclean
    prefix: '-o'

baseCommand: [ fastq_to_fasta.py ]

CWL From line 25 of fastq_to_fasta/fastq_to_fasta.cwl

baseCommand: [ generate_checksum.py ]

CWL From line 22 of generate_checksum/generate_checksum.cwl

baseCommand: [ pigz ]
arguments: ["-p", "8", "-c"]

CWL From line 17 of pigz/gzip.cwl

arguments:
    - prefix: -n
      valueFrom: |
        ${
          if (inputs.size_limit) { return inputs.size_limit }
          if (inputs.type_fasta == 'n') {
            return 1980
          }
          if (inputs.type_fasta == 'p') {
            return 1442
          }
        }

baseCommand: [ split_fasta_by_size.sh ]

CWL From line 29 of result-file-chunker/split_fasta.cwl

baseCommand: [ megahit ]

inputs:

  memory:
    type: float?
    label: Memory to run assembly. When 0 < -m < 1, fraction of all available memory of the machine is used, otherwise it specifies the memory in BYTE.
    default: 0.9
    inputBinding:
      position: 4
      prefix: "--memory"

  min-contig-len:
    type: int?
    default: 500
    inputBinding:
      position: 3
      prefix: "--min-contig-len"

  forward_reads:
    type:
      - File?
      - type: array
        items: File
    inputBinding:
      position: 1
      prefix: "-1"

  reverse_reads:
    type:
      - File?
      - type: array
        items: File
    inputBinding:
      position: 2
      prefix: "-2"

  threads: 
    type: int
    default: 1
    inputBinding: 
      position: 5
      prefix: "--num-cpu-threads"