Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
johanneskoester committed May 6, 2021
1 parent e13c52c commit e76fcfb
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 30 deletions.
8 changes: 0 additions & 8 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,9 @@ container: "docker://continuumio/miniconda3"


include: "rules/common.smk"


include: "rules/ref.smk"


include: "rules/trim.smk"


include: "rules/align.smk"


include: "rules/diffexp.smk"


Expand Down
30 changes: 20 additions & 10 deletions workflow/scripts/count-matrix.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
import sys

# logging
sys.stderr = open(snakemake.log[0], "w")

import pandas as pd


def get_column(strandedness):
if pd.isnull(strandedness) or strandedness == "none":
return 1 #non stranded protocol
return 1 # non stranded protocol
elif strandedness == "yes":
return 2 #3rd column
return 2 # 3rd column
elif strandedness == "reverse":
return 3 #4th column, usually for Illumina truseq
return 3 # 4th column, usually for Illumina truseq
else:
raise ValueError(("'strandedness' column should be empty or have the "
"value 'none', 'yes' or 'reverse', instead has the "
"value {}").format(repr(strandedness)))
raise ValueError(
(
"'strandedness' column should be empty or have the "
"value 'none', 'yes' or 'reverse', instead has the "
"value {}"
).format(repr(strandedness))
)


counts = [pd.read_table(f, index_col=0, usecols=[0, get_column(strandedness)],
header=None, skiprows=4)
for f, strandedness in zip(snakemake.input, snakemake.params.strand)]
counts = [
pd.read_table(
f, index_col=0, usecols=[0, get_column(strandedness)], header=None, skiprows=4
)
for f, strandedness in zip(snakemake.input, snakemake.params.strand)
]

for t, sample in zip(counts, snakemake.params.samples):
t.columns = [sample]
Expand All @@ -27,4 +37,4 @@ def get_column(strandedness):
matrix.index.name = "gene"
# collapse technical replicates
matrix = matrix.groupby(matrix.columns, axis=1).sum()
matrix.to_csv(snakemake.output[0], sep="\t")
matrix.to_csv(snakemake.output[0], sep="\t")
26 changes: 14 additions & 12 deletions workflow/scripts/gtf2bed.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import gffutils

db = gffutils.create_db(snakemake.input[0],
dbfn=snakemake.output.db,
force=True,
keep_order=True,
merge_strategy='merge',
sort_attribute_values=True,
disable_infer_genes=True,
disable_infer_transcripts=True)
db = gffutils.create_db(
snakemake.input[0],
dbfn=snakemake.output.db,
force=True,
keep_order=True,
merge_strategy="merge",
sort_attribute_values=True,
disable_infer_genes=True,
disable_infer_transcripts=True,
)

with open(snakemake.output.bed, 'w') as outfileobj:
for tx in db.features_of_type('transcript', order_by='start'):
bed = [s.strip() for s in db.bed12(tx).split('\t')]
with open(snakemake.output.bed, "w") as outfileobj:
for tx in db.features_of_type("transcript", order_by="start"):
bed = [s.strip() for s in db.bed12(tx).split("\t")]
bed[3] = tx.id
outfileobj.write('{}\n'.format('\t'.join(bed)))
outfileobj.write("{}\n".format("\t".join(bed)))

0 comments on commit e76fcfb

Please sign in to comment.