Second workflow that takes the output of the guppy workflow to run fastqc and create a multiQC report. Then run Mothur for analysis.

public 1yr ago 0 bookmarks

View Workflow

This workflow takes the output from the GUPPY workflow and runs fastqc, multiqc, and mothur.

Authors

Hans Vasquez-Gross
Lucas Bishop

Usage

Simple

Step 1: Install workflow

clone this workflow to your local computer

Step 2: Configure workflow

Configure the workflow according to your needs by editing the config.yaml to configure your input basespace PROJECT directory.

Step 3: Execute workflow

Test your configuration by performing a dry-run via

snakemake --use-conda -n

Code Snippets

shell:
    "ln -s {input} {output}"

SnakeMake From line 51 of main/Snakefile

wrapper:
    "v1.3.2/bio/fastqc"

SnakeMake FastQC From line 65 of main/Snakefile

wrapper:
    "v1.3.2/bio/multiqc"

SnakeMake MultiQC From line 79 of main/Snakefile

shell:
    """
    cd {params.indir}
    mothur "#set.dir(output={params.outdir}); fastq.info(fastq={params.fq})"
    """

SnakeMake mothur From line 99 of main/Snakefile

shell:
    """
    cd {params.indir}
    mothur "#trim.seqs(fasta={params.fasta}, qfile={params.qual}, qaverage=10, processors=16)"
    """

SnakeMake mothur From line 122 of main/Snakefile

    shell:
        """
        cd {params.mothurdir}
	touch {output.finished} 
        mothur "#set.dir(output={params.workingdir});
	merge.files(input={params.fasta}, output=merged_results.fasta);
	make.group(fasta={params.fasta}, groups={params.groups})" || true
        """

SnakeMake mothur From line 145 of main/Snakefile

    shell:
        """
        cd {params.mothurdir}
        touch {output.finished}
        mothur "#set.dir(output={params.workingdir});
	merge.files(input={params.fasta}, output=merged_results.fasta);
	make.group(fasta={params.fasta}, groups={params.groups});
	screen.seqs(fasta=merged_results.fasta, group=current, maxambig=0, maxlength=1700, maxhomop=8);
	unique.seqs(fasta=current);
	count.seqs(name=current, group=current);
	align.seqs(fasta=current, reference={input.refbac});
	filter.seqs(fasta=current, vertical=T);
	unique.seqs(fasta=current, count=current);
	pre.cluster(fasta=current, count=current, diffs=2);
	chimera.vsearch(fasta=current, count=current, dereplicate=T);
	remove.seqs(fasta=current, accnos=current);
	classify.seqs(fasta=current, count=current, reference={input.trainsetfasta}, taxonomy={input.trainsettax}, cutoff=80);
	remove.lineage(fasta=current, count=current, taxonomy=current, taxon={params.lineageremove});
	phylotype(taxonomy=current);
	make.shared(list=current, count=current, label=1);
	classify.otu(list=current, count=current, taxonomy=current, label=1)" || true
        """

SnakeMake mothur From line 179 of main/Snakefile

__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "julianderuiter@gmail.com"
__license__ = "MIT"


from os import path
import re
from tempfile import TemporaryDirectory

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)


def basename_without_ext(file_path):
    """Returns basename of file path, without the file extension."""

    base = path.basename(file_path)
    # Remove file extension(s) (similar to the internal fastqc approach)
    base = re.sub("\\.gz$", "", base)
    base = re.sub("\\.bz2$", "", base)
    base = re.sub("\\.txt$", "", base)
    base = re.sub("\\.fastq$", "", base)
    base = re.sub("\\.fq$", "", base)
    base = re.sub("\\.sam$", "", base)
    base = re.sub("\\.bam$", "", base)

    return base


# Run fastqc, since there can be race conditions if multiple jobs
# use the same fastqc dir, we create a temp dir.
with TemporaryDirectory() as tempdir:
    shell(
        "fastqc {snakemake.params} -t {snakemake.threads} "
        "--outdir {tempdir:q} {snakemake.input[0]:q}"
        " {log}"
    )

    # Move outputs into proper position.
    output_base = basename_without_ext(snakemake.input[0])
    html_path = path.join(tempdir, output_base + "_fastqc.html")
    zip_path = path.join(tempdir, output_base + "_fastqc.zip")

    if snakemake.output.html != html_path:
        shell("mv {html_path:q} {snakemake.output.html:q}")

    if snakemake.output.zip != zip_path:
        shell("mv {zip_path:q} {snakemake.output.zip:q}")

Python Snakemake FastQC From line 3 of fastqc/wrapper.py

__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "julianderuiter@gmail.com"
__license__ = "MIT"


from os import path

from snakemake.shell import shell


input_dirs = set(path.dirname(fp) for fp in snakemake.input)
output_dir = path.dirname(snakemake.output[0])
output_name = path.basename(snakemake.output[0])
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell(
    "multiqc"
    " {snakemake.params}"
    " --force"
    " -o {output_dir}"
    " -n {output_name}"
    " {input_dirs}"
    " {log}"
)