Robust Optogenetic Inhibition with Red-light-sensitive Anion-conducting Channelrh
Help improve this workflow!
This workflow has been published but could be further improved with some additional meta data:- Keyword(s) in categories input, output, operation, topic
You can help improve this workflow by suggesting the addition or removal of keywords, suggest changes and report issues, or request to become a maintainer of the Workflow .
ACRs
This repository hosts the bioinformatic analysis worklow used in Oppermann et al (2023) Robust Optogenetic Inhibition with Red-light-sensitive Anion-conducting Channelrhodopsins, bioRxiv .
The
snakemake
-based code for the workflow is under
workflow/
. The dependencies are under conda control (
snakemake --use-conda
), see
workflow/envs
. The analysis files are under
analysis/
, in particular
ACRs/analysis/pdb
contains the output files for the structural alignment (
sequences.aln
is the main output) and
analysis/all
hosts the various files generated for the phylogenetic analysis:
-
sequences.fasta
: unaligned sequences -
cdhit.fasta
: non-redundant set of sequences -
mafft.fasta
: mafft alignment -
trimal.fasta
: trimal trimmed alignment -
iqtree.treefile
: the phylogenetic tree from iqtree with ultrafasta bootstrap support values
The metadata for the sequences can be found in
metadata/Channelrhodopsins_Updated_List.xlsx
which is a snapshot of the
Catalogue of Natural Channelrhodopsins
.
Code Snippets
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | library(dplyr) library(tidyr) library(bio3d) template_file <- unlist(snakemake@input) output_file <- unlist(snakemake@output) templates <- read.table("pdb/template.txt", col.names = c("ID", "_P_", "fname")) %>% mutate(ID = substring(ID, 2)) data <- lapply(templates$fname, read.pdb) %>% setNames(templates$ID) sheets <- lapply(data, `[[`, "sheet") %>% lapply(data.frame) %>% setNames(templates$ID) %>% bind_rows(.id = "ID") %>% mutate(sense = ifelse("sense" %in% names(.), as.numeric(sense), NA), sense = ifelse(sense < 0, "-", "+")) helices <- lapply(data, `[[`, "helix") %>% lapply(data.frame) %>% setNames(templates$ID) %>% bind_rows(.id = "ID") list(sheet = sheets, helix = helices) %>% bind_rows(.id = "ss") %>% filter(chain == "A") %>% replace_na(list(sense = ".")) %>% mutate(source = "SS", score = ".", frame = ".", attrib = ".") %>% select(ID, source, ss, start, end, score, sense, frame, attrib) %>% write.table(output_file, quote = F, sep = "\t", col.names = F, row.names = F) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord template_txt = str(snakemake.input) fasta_file = str(snakemake.output) aminoacids = { "ALA": "A", "ARG": "R", "ASN": "N", "ASP": "D", "CYS": "C", "GLN": "Q", "GLU": "E", "GLY": "G", "HIS": "H", "ILE": "I", "LEU": "L", "LYS": "K", "MET": "M", "PHE": "F", "PRO": "P", "SER": "S", "THR": "T", "TRP": "W", "TYR": "Y", "VAL": "V", "LYR": "K" } def to_fasta(fname): has_seqres = False seq = [] with open(fname) as fh: for line in fh: if line.startswith('SEQRES'): has_seqres = True tag, row_num, chain, length, *residues = line.split() if chain == 'A': seq += [ aminoacids[x] for x in residues ] elif not has_seqres and line.startswith('ATOM'): tag, atom_num, atom, residue, chain, residue_num, *rest = line.split() if chain == 'A' and atom == 'N': seq.append(aminoacids[residue]) return ''.join(seq) with open(fasta_file, 'w') as out: with open(template_txt) as fh: for line in fh: name, tag, pdb_file = line.split() seq = to_fasta(pdb_file) record = SeqRecord(Seq(seq), name[1:], description = '') SeqIO.write(record, out, 'fasta') |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | library(dplyr) library(tidyr) library(treeio) library(ggtree) library(readxl) library(photobiology) library(ggplot2) library(ggnewscale) library(castor) library(ape) with(snakemake@input, { tree_file <<- tree metadata_file <<- metadata }) output_file <- unlist(snakemake@output) to_treedata <- function(tree) { class(tree) <- c("tbl_tree", "tbl_df", "tbl", "data.frame") as.treedata(tree) } add_hsp <- function(tree, colname) { colname <- deparse(substitute(colname)) treedata <- to_treedata(tree) categories <- setNames(tree[[colname]], tree[["label"]]) %>% `[`(treedata@phylo$tip.label) %>% as.factor hsp <- hsp_max_parsimony(treedata@phylo, as.numeric(categories), edge_exponent = 0.1) %>% `$`("likelihoods") %>% as.data.frame %>% setNames(levels(categories)) %>% mutate(node = 1:n()) %>% gather(value, likelihood, -node) %>% filter(likelihood > 0.99) %>% setNames(c("node", paste0(colname, "_hsp"), paste0(colname, "_hsp_lh"))) left_join(tree, hsp, by = "node") } get_mrca <- function(phylo, tips) { getMRCA(phylo, tips) %>% replace(is.null(.), NA) } add_mrca <- function(tree, colname) { colname <- deparse(substitute(colname)) treedata <- to_treedata(tree) mrca <- mutate(tree, my_column = !!as.name(colname)) %>% group_by(my_column) %>% mutate(is.tip = label %in% treedata@phylo$tip.label) %>% mutate(no_data = all(is.na(my_column))) %>% mutate(mrca = get_mrca(treedata@phylo, node[is.tip])) %>% mutate(mrca = ifelse(no_data | is.na(mrca), node, mrca)) %>% group_by(mrca) %>% mutate(enough_tips = sum(is.tip) > 1) %>% mutate(ifelse(node == mrca & enough_tips, first(na.omit(my_column)), NA)) %>% pull tree[[paste0(colname, "_mrca")]] <- mrca return(tree) } metadata <- read_xlsx(metadata_file, .name_repair = "universal") %>% mutate(Sequence.name = sub(",.+", "", Sequence.name)) %>% mutate(Sequence.name = gsub("@", "_", Sequence.name)) %>% mutate(Maximum..nm = ifelse(is.na(Action.maximum..nm), Absorption.maximum..nm, Action.maximum..nm)) tree <- read.iqtree(tree_file) %>% as_tibble %>% left_join(metadata, by = c(label = "Sequence.name")) %>% mutate(Color = unname(w_length2rgb(Maximum..nm))) %>% mutate(Category = case_when(Currents %in% c("no photocurrents", "channel") ~ NA_character_, T ~ gsub("[][]", "", Currents))) %>% mutate(Symbol = gsub(",.+", "", Symbol)) %>% mutate(Symbol_show = ifelse(Currents.reference == "[Oppermann23](in_prep)", Symbol, NA)) %>% add_mrca(ChR.group) %>% add_hsp(Category) cat_colors <- list( "anion channel" = "indianred", "cation channel" = "deepskyblue", "potassium channel" = "purple", "channel" = "yellow4" ) p <- ggtree(to_treedata(tree), aes(color = Category_hsp), layout = "ape") + scale_color_manual(values = cat_colors) + new_scale_color() + geom_nodepoint(aes(x = branch.x, y = branch.y, subset = !is.na(UFboot) & UFboot >= 95), size = 0.2, color = "#4d4d4dff") + geom_nodepoint(aes(x = branch.x, y = branch.y, subset = !is.na(UFboot) & UFboot >= 90 & UFboot < 95), size = 0.2, color = "#b3b3b3ff") + geom_tippoint(aes(subset = !is.na(Category) & is.na(Color)), color = "darkgray") + geom_tippoint(aes(subset = !is.na(Color), color = Color)) + scale_colour_identity() + new_scale_color() + geom_tiplab2(aes(label = Symbol_show), hjust = -0.2) + geom_treescale(width = 0.5) + geom_cladelab(mapping = aes(subset = !is.na(ChR.group_mrca), node = node, label = ChR.group_mrca), offset = -0.1) + xlim(-10, 10) ggsave(output_file, p, width = 7, height = 7) |
1
of
scripts/plot_tree.R
2 3 4 5 6 7 8 9 | from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord with open(str(snakemake.output), 'w') as file: for name, seq in snakemake.params['seq'].items(): record = SeqRecord(Seq(seq), id = name) SeqIO.write(record, file, 'fasta') |
28 29 | script: "scripts/write_fasta.py" |
40 41 | shell: "mafft --thread {threads} --auto {input} > {output}" |
52 53 | shell: "cdhit -i {input} -o {output} -c {params.c} -d 0" |
64 65 | shell: "trimal -in {input} -out {output} -gt {params.gt}" |
74 75 | shell: "gotree reroot midpoint -i {input} -o {output}" |
92 93 | shell: "iqtree2 -seed {params.seed} -s {input} -B {params.B} -T {threads} -pers {params.pers} -nstop {params.nstop} --prefix {params.prefix} -redo" |
102 103 | script: "scripts/pdb2fasta.py" |
120 121 | shell: "t_coffee {input.sequences} -outfile {output.aln} -newtree {output.dnd} -method {params.method} -template_file {input.template} -pdb_min_sim {params.pdb_min_sim} -pdb_min_cov {params.pdb_min_cov} -n_core {threads}" |
131 132 | script: "scripts/plot_tree.R" |
141 142 | script: "scripts/features.R" |
Support
- Future updates
Related Workflows





