formatting

snakemake-workflows · May 6, 2021 · e76fcfb · e76fcfb
1 parent e13c52c
commit e76fcfb
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 30 deletions.
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -23,17 +23,9 @@ container: "docker://continuumio/miniconda3"
 
 
 include: "rules/common.smk"
-
-
 include: "rules/ref.smk"
-
-
 include: "rules/trim.smk"
-
-
 include: "rules/align.smk"
-
-
 include: "rules/diffexp.smk"
 
 

diff --git a/workflow/scripts/count-matrix.py b/workflow/scripts/count-matrix.py
@@ -1,24 +1,34 @@
 import sys
+
 # logging
 sys.stderr = open(snakemake.log[0], "w")
 
 import pandas as pd
 
+
 def get_column(strandedness):
     if pd.isnull(strandedness) or strandedness == "none":
-        return 1 #non stranded protocol
+        return 1  # non stranded protocol
     elif strandedness == "yes":
-        return 2 #3rd column
+        return 2  # 3rd column
     elif strandedness == "reverse":
-        return 3 #4th column, usually for Illumina truseq
+        return 3  # 4th column, usually for Illumina truseq
     else:
-        raise ValueError(("'strandedness' column should be empty or have the " 
-                          "value 'none', 'yes' or 'reverse', instead has the " 
-                          "value {}").format(repr(strandedness)))
+        raise ValueError(
+            (
+                "'strandedness' column should be empty or have the "
+                "value 'none', 'yes' or 'reverse', instead has the "
+                "value {}"
+            ).format(repr(strandedness))
+        )
+
 
-counts = [pd.read_table(f, index_col=0, usecols=[0, get_column(strandedness)], 
-          header=None, skiprows=4) 
-          for f, strandedness in zip(snakemake.input, snakemake.params.strand)]
+counts = [
+    pd.read_table(
+        f, index_col=0, usecols=[0, get_column(strandedness)], header=None, skiprows=4
+    )
+    for f, strandedness in zip(snakemake.input, snakemake.params.strand)
+]
 
 for t, sample in zip(counts, snakemake.params.samples):
     t.columns = [sample]
@@ -27,4 +37,4 @@ def get_column(strandedness):
 matrix.index.name = "gene"
 # collapse technical replicates
 matrix = matrix.groupby(matrix.columns, axis=1).sum()
-matrix.to_csv(snakemake.output[0], sep="\t")
+matrix.to_csv(snakemake.output[0], sep="\t")
diff --git a/workflow/scripts/gtf2bed.py b/workflow/scripts/gtf2bed.py
@@ -1,16 +1,18 @@
 import gffutils
 
-db = gffutils.create_db(snakemake.input[0],
-                        dbfn=snakemake.output.db,
-                        force=True,
-                        keep_order=True,
-                        merge_strategy='merge',
-                        sort_attribute_values=True,
-                        disable_infer_genes=True,
-                        disable_infer_transcripts=True)
+db = gffutils.create_db(
+    snakemake.input[0],
+    dbfn=snakemake.output.db,
+    force=True,
+    keep_order=True,
+    merge_strategy="merge",
+    sort_attribute_values=True,
+    disable_infer_genes=True,
+    disable_infer_transcripts=True,
+)
 
-with open(snakemake.output.bed, 'w') as outfileobj:
-    for tx in db.features_of_type('transcript', order_by='start'):
-        bed = [s.strip() for s in db.bed12(tx).split('\t')]
+with open(snakemake.output.bed, "w") as outfileobj:
+    for tx in db.features_of_type("transcript", order_by="start"):
+        bed = [s.strip() for s in db.bed12(tx).split("\t")]
         bed[3] = tx.id
-        outfileobj.write('{}\n'.format('\t'.join(bed)))
+        outfileobj.write("{}\n".format("\t".join(bed)))