Snakemake workflow for analysis of Tara Euk Metagenomes

public 1yr ago 0 bookmarks

View Workflow

Setup

Create a conda environment for the running of the pipeline:

conda env create --name snakemake-tara-euk --file environment.yaml

Code Snippets

wrapper:
    "0.27.1/bio/fastqc"

SnakeMake FastQC From line 149 of master/Snakefile

wrapper:
    "0.27.1/bio/trimmomatic/pe"

SnakeMake From line 167 of master/Snakefile

wrapper:
    "0.27.1/bio/fastqc"

SnakeMake FastQC From line 179 of master/Snakefile

shell: 
    """
    multiqc -n multiqc.html {input.rawG}
    mv multiqc.html {output.html_rawG}
    mv multiqc_data/multiqc_general_stats.txt {output.stats_rawG} 
    rm -rf multiqc_data

    multiqc -n multiqc.html {input.trimmedG}
    mv multiqc.html {output.html_trimmedG}
    mv multiqc_data/multiqc_general_stats.txt {output.stats_trimmedG} 
    rm -rf multiqc_data

    multiqc -n multiqc.html {input.trimmedT}
    mv multiqc.html {output.html_trimmedT}
    mv multiqc_data/multiqc_general_stats.txt {output.stats_trimmedT} 
    rm -rf multiqc_data

    multiqc -n multiqc.html {input.trimmedT}
    mv multiqc.html {output.html_trimmedT}
    mv multiqc_data/multiqc_general_stats.txt {output.stats_trimmedT} 
    rm -rf multiqc_data
    """ 

SnakeMake MultiQC From line 199 of master/Snakefile

shell: 
    """
    interleave-reads.py {input.r2} {input.r1} | trim-low-abund.py --gzip -C 3 -Z 18 -M 30e9 -V - -o {output} 2> {log} 
    """

SnakeMake From line 233 of master/Snakefile

shell: 
    """
    zcat {input.r1} {input.r2} | sourmash compute -k 21,31,51\
        --scaled 10000  --track-abundance \
        -o {output} - 2> {log}
    """

SnakeMake From line 248 of master/Snakefile

shell: 
    """
    megahit -1 {params.inputr1} -2 {params.inputr2} --min-contig-len {params.min_contig_len} --memory {params.memory} -t {params.cpu_threads} --out-dir {params.megahit_output_name} {params.other_options}  >> {log} 2>&1
    """

SnakeMake MEGAHIT From line 273 of master/Snakefile

shell:
    """
    bwa index {input} 2> {log}
    """

SnakeMake BWA From line 294 of master/Snakefile

shell:
    """
    cp {input} {output}
    """

SnakeMake From line 304 of master/Snakefile

shell:
    """ 
    bwa mem -t {params.threads} {params.extra} {input.reference} {input.r1} {input.r2} | samtools sort -o {output} - >> {log} 2>&1
    """ 

SnakeMake SAMtools BWA From line 329 of master/Snakefile

shell:
    """
    jgi_summarize_bam_contig_depths --outputDepth {output} {input} > {log} 2>&1
    """

SnakeMake From line 343 of master/Snakefile

shell:
    """
    metabat2 {params.other} --numThreads {params.threads} -i {input.assembly} -a {input.depth} -o {output} > {log} 2>&1
    """

SnakeMake MetaBAT 2 From line 361 of master/Snakefile

shell:
    '''
    concoct --coverage_file {input.depth} \
        --composition_file {input.assembly} \
        --basename {params.outdir} \
        --length_threshold {params.length} \
        --converge_out -t 12\
    '''        

SnakeMake CONCOCT From line 376 of master/Snakefile

shell: 
    '''
    merge_cutup_clustering.py {input} > {output}
    '''

SnakeMake From line 389 of master/Snakefile

shell:
    '''
    mkdir -p {output}
    extract_fasta_bins.py {input.assembly} {input.csv} --output_path {params.outdir}
    touch {output.done}
    '''

SnakeMake From line 399 of master/Snakefile

shell:
    """
    ulimit -s 65536
    filter_euk_bins.py {input.fastas} --minbpeuks {params.minbpeuks} --eukratio {params.eukratio} --output {output} --tempdir {params.tempdir}
    """

SnakeMake From line 411 of master/Snakefile

shell: 
    """
    EukRep -i {input} -o {output} --prokarya {params.prok} --min {params.min_contig} > {log} 2>&1
    """

SnakeMake eukrep From line 429 of master/Snakefile

shell:
    """
    metabat2 {params.other} --numThreads {params.threads} -i {input.assembly} -a {input.depth} -o {output} > {log} 2>&1
    """

SnakeMake MetaBAT 2 From line 447 of master/Snakefile

shell:
    """
    prodigal -i {input.assembly} -f gff -o {output.genes} -a {output.proteins} -p meta
    """

SnakeMake prodigal From line 460 of master/Snakefile

shell:
    """
    bwa index {input} 2> {log}
    """

SnakeMake BWA From line 480 of master/Snakefile

shell:
    """ 
    bwa mem -t {params.threads} {params.extra} {input.reference} {input.r1} {input.r2} | samtools sort -o {output} - >> {log} 2>&1
    """ 

SnakeMake SAMtools BWA From line 505 of master/Snakefile

shell:
    """
    coverm genome --bam-files {input.mapping} --genome-fasta-directory {input.genome_dir} --genome-fasta-extension "fa"  --min-read-percent-identity 0.95     --min-read-aligned-percent 0.75  --proper-pairs-only --methods count length covered_bases covered_fraction reads_per_base mean variance trimmed_mean rpkm relative_abundance     --output-format dense     --min-covered-fraction 0     --contig-end-exclusion 75     --trim-min 0.05     --trim-max 0.95 --quiet > {output}
    """

SnakeMake CoverM From line 518 of master/Snakefile

shell:
    """
    coverm genome --bam-files {input.mapping} --genome-fasta-directory {input.genome_dir} --genome-fasta-extension "fa"  --min-read-percent-identity 0.95     --min-read-aligned-percent 0.75  --proper-pairs-only --methods coverage_histogram --output-format dense     --min-covered-fraction 0     --contig-end-exclusion 75     --trim-min 0.05     --trim-max 0.95 --quiet > {output}
    """

SnakeMake CoverM From line 534 of master/Snakefile

__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "julianderuiter@gmail.com"
__license__ = "MIT"


from os import path
from tempfile import TemporaryDirectory

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

def basename_without_ext(file_path):
    """Returns basename of file path, without the file extension."""

    base = path.basename(file_path)

    split_ind = 2 if base.endswith(".gz") else 1
    base = ".".join(base.split(".")[:-split_ind])

    return base


# Run fastqc, since there can be race conditions if multiple jobs 
# use the same fastqc dir, we create a temp dir.
with TemporaryDirectory() as tempdir:
    shell("fastqc {snakemake.params} --quiet "
          "--outdir {tempdir} {snakemake.input[0]}"
          " {log}")

    # Move outputs into proper position.
    output_base = basename_without_ext(snakemake.input[0])
    html_path = path.join(tempdir, output_base + "_fastqc.html")
    zip_path = path.join(tempdir, output_base + "_fastqc.zip")

    if snakemake.output.html != html_path:
        shell("mv {html_path} {snakemake.output.html}")

    if snakemake.output.zip != zip_path:
        shell("mv {zip_path} {snakemake.output.zip}")

Python Snakemake FastQC From line 3 of fastqc/wrapper.py

__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"


from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
trimmer = " ".join(snakemake.params.trimmer)

shell("trimmomatic PE {snakemake.params.extra} "
      "{snakemake.input.r1} {snakemake.input.r2} "
      "{snakemake.output.r1} {snakemake.output.r1_unpaired} "
      "{snakemake.output.r2} {snakemake.output.r2_unpaired} "
      "{trimmer} "
      "{log}")