feat: allow multiple target_regions BED files defined in config.yaml,…

… merging them into one (#161) * feat: allow multiple target_regions BED files defined in config.yaml, merging them into one * ensure that rule get_target_regions is also well-defined in absence of config["target_regions"] * dummy commit to retrigger GitHub Actions * GitHub Actions main.yaml indentation fix * snakefmt * complete naming switch to `expanded_regions` (to differentiate from `target_regions`, which are regions from a panel definition file) * fix wildcard constraints * ensure bedtools is available for rule filter_grou_regions * try sorting the filter_group_regions output * correctly subset groups to currently handled batch in oncoprint.py * extend target-regions for the respective tests, so that delly has enough reads to estimate library parameters * fix target-regions BED files by adding final newline * more concise config.get() syntax in workflow/rules/regions.smk Co-authored-by: Johannes Köster <johannes.koester@tu-dortmund.de> * update datavzrd and its config syntax to 2.1 * enable heatmaps for header rows, adapt to new datavzrd syntax for column rendering * fix * fix Co-authored-by: Johannes Köster <johannes.koester@tu-dortmund.de> Co-authored-by: Johannes Köster <johannes.koester@uni-due.de>
snakemake-workflows · Sep 23, 2022 · 84064ef · 84064ef
1 parent 4b99df9
commit 84064ef
Show file tree

Hide file tree

Showing 13 changed files with 341 additions and 169 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -65,6 +65,15 @@ jobs:
         stagein: |
           rm -rf .test/results
 
+    - name: Test workflow (local FASTQs, multiple target regions BED files)
+      uses: snakemake/snakemake-github-action@v1
+      with:
+        directory: .test
+        snakefile: workflow/Snakefile
+        args: "--configfile .test/config-target-regions/config_multiple_beds.yaml --use-conda --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba"
+        stagein: |
+          rm -rf .test/results
+
     - name: Test workflow (local FASTQs, no candidate filtering)
       uses: snakemake/snakemake-github-action@v1
       with:

diff --git a/.test/config-target-regions/config_multiple_beds.yaml b/.test/config-target-regions/config_multiple_beds.yaml
@@ -0,0 +1,111 @@
+samples: config-target-regions/samples.tsv
+
+units: config-target-regions/units.tsv
+
+target_regions:
+  - config-target-regions/target-regions.bed
+  - config-target-regions/target-regions_extra.bed
+
+ref:
+  # Number of chromosomes to consider for calling.
+  # The first n entries of the FASTA will be considered.
+  n_chromosomes: 17
+  # Ensembl species name
+  species: saccharomyces_cerevisiae
+  # Ensembl release
+  release: 100
+  # Genome build
+  build: R64-1-1
+
+primers:
+  trimming:
+    activate: false
+    primers_fa1: ""
+    primers_fa2: ""
+    library_length: 0
+
+
+# Estimation of mutational burden.
+mutational_burden:
+  activate: true
+  events:
+    - present
+
+calling:
+  delly:
+    activate: true
+  freebayes:
+    activate: true
+  # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling
+  scenario: config-target-regions/scenario.yaml
+  # See http://snpeff.sourceforge.net/SnpSift.html#filter
+  filter:
+    candidates: "ANN['IMPACT'] != 'LOW'"
+    moderate: "ANN['IMPACT'] == 'MODERATE'"
+  fdr-control:
+    threshold: 0.05
+    local: true
+    events: 
+      present:
+        varlociraptor: 
+          - present
+        filter:
+          - moderate
+        desc: Variants with moderate impact
+        # Optional column names for sorting
+        sort:
+          - impact
+
+annotations:
+  vcfs:
+    activate: true
+    known: resources/variation.vcf.gz
+  dgidb:
+    activate: false
+    datasources:
+      - DrugBank
+  vep:
+    params: --everything
+    plugins:
+      # Add any plugin from https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
+      # Plugin args can be passed as well, e.g. "LoFtool,path/to/custom/scores.txt".
+      - LoFtool
+
+
+params:
+  cutadapt: ""
+  picard:
+    MarkDuplicates: ""
+  gatk:
+    BaseRecalibrator: ""
+    applyBQSR: ""
+  varlociraptor:
+    # add extra arguments for varlociraptor call
+    # For example, in case of panel data consider to omit certain bias estimations
+    # which might be misleading because all reads of an amplicon have the sample start
+    # position, strand etc. (--omit-strand-bias, --omit-read-position-bias, 
+    # --omit-softclip-bias, --omit-read-orientation-bias).
+    call: ""
+    # Add extra arguments for varlociraptor preprocess. By default, we limit the depth to 200.
+    # Increase this value for panel sequencing!
+    preprocess: "--max-depth 200"
+  freebayes:
+    min_alternate_fraction: 0.05 # Reduce for calling variants with lower VAFs
+
+report:
+  activate: true
+  max_read_depth: 250
+  stratify:
+    activate: false
+    by-column: condition
+
+# printing of variants in a table format
+tables:
+  activate: true
+  # vembrane expression to generate the table
+  output:
+    expression: "INDEX, CHROM, POS, REF, ALT[0], ANN['Consequence'], ANN['IMPACT'], ANN['SYMBOL'], ANN['Feature']"
+    genotype: true
+    coverage: true
+    event_prob: true
+  generate_excel: true
diff --git a/.test/config-target-regions/target-regions.bed b/.test/config-target-regions/target-regions.bed
@@ -1 +1,17 @@
-I	1	10000
+I	500	230218
+II	500	813184
+III	0	316620
+IV	0	1531933
+V	0	576874
+VI	0	270161
+VII	0	1090940
+VIII	0	562643
+IX	0	439888
+X	0	745751
+XI	0	666816
+XII	0	1078177
+XIII	0	924431
+XIV	0	784333
+XV	0	1091291
+XVI	0	948066
+Mito	0	85779
diff --git a/.test/config-target-regions/target-regions_extra.bed b/.test/config-target-regions/target-regions_extra.bed
@@ -0,0 +1,2 @@
+I	0	100
+I	200	500
diff --git a/workflow/envs/bcftools.yaml b/workflow/envs/bcftools.yaml
@@ -2,4 +2,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - bcftools =1.12
+  - bcftools =1.14
diff --git a/workflow/envs/datavzrd.yaml b/workflow/envs/datavzrd.yaml
@@ -1,4 +1,4 @@
 channels:
   - conda-forge
 dependencies:
-  - datavzrd =1.17
+  - datavzrd =2.1