Skip to content

Commit

Permalink
Local tests fully working for MP
Browse files Browse the repository at this point in the history
  • Loading branch information
iquasere committed Jan 3, 2024
1 parent 8860a1c commit 0d27e5e
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 20 deletions.
4 changes: 2 additions & 2 deletions workflow/rules/de_analysis.smk
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
rule de_analysis:
input:
f"{OUTPUT}/Quantification/dea_input.tsv"
f"{OUTPUT}/Quantification/dea_input.tsv" if len(mt_exps) > 0 else f"{OUTPUT}/Metaproteomics/mp_normalized.tsv"
output:
f"{OUTPUT}/DE_analysis/condition_treated_results.tsv",
f"{OUTPUT}/DE_analysis/condition_treated_results.tsv"
threads:
1
params:
Expand Down
35 changes: 24 additions & 11 deletions workflow/scripts/de_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ if(snakemake@params$datatype == "rna_seq") {
library("ROTS")
library("tibble")
# Reproducibility-Optimized Test Statistics
total <- log2(total[!(rowSums(total == 0) > 0), ])
de <- ROTS(data=total, groups=conditions, progress=TRUE)
de_res <- cbind(de$logfc, de$pvalue, de$FDR)
colnames(de_res) <- c("log2FoldChange", "pvalue", "FDR")
Expand All @@ -95,19 +94,33 @@ if(snakemake@params$datatype == "rna_seq") {
file=paste0(snakemake@params$output, '/report.txt'), append=TRUE)
}

sorted_data <- head(as.matrix(total[order(de$pvalue), ]), 30)
#summary(de, fdr = 0.05)
for (ptype in c('volcano', 'heatmap', 'ma', 'reproducibility', 'pvalue', 'pca')){
for (ptype in c('volcano', 'heatmap', 'ma', 'reproducibility', 'pvalue', 'pca')) {
print(paste0("Plotting ", ptype, " plot."))
jpeg(paste0(snakemake@params$output, "/", ptype, ".jpeg"), res=300)
jpeg_file <- paste0(ptype, ".jpeg")

if (ptype == 'heatmap') {
# heatmap with legend requires this call, also arranges the data better
sorted_data <- total[order(de$pvalue), ] # total is always equal to de$data
heatmap(sorted_data, Rowv = NA, Colv = NA, col = colorRampPalette(c("blue", "white", "red"))(256), legend = TRUE)
} else {
plot(de, type = ptype, fdr = snakemake@params$fdr)
}
dev.off()
tryCatch({
if (ptype == 'heatmap') {
pheatmap(sorted_data, Rowv = NA, Colv = NA, col = colorRampPalette(c("blue", "white", "red"))(256), legend = TRUE)
} else {
jpeg(jpeg_file, res = 300)
plot(de, type = ptype, fdr = 0.05)
dev.off()
}
}, error = function(e) {
tryCatch({
if (ptype == 'heatmap') {
pheatmap(sorted_data, Rowv = NA, Colv = NA, col = colorRampPalette(c("blue", "white", "red"))(256), legend = TRUE)
} else {
jpeg(jpeg_file) # If plot generation fails due to margin size, try without specifying res
plot(de, type = ptype, fdr = 0.05)
dev.off()
}
}, error = function(inner_e) {
cat("Error in plotting", ptype, "plot:", conditionMessage(inner_e), "\n")
})
})
}
}

Expand Down
6 changes: 3 additions & 3 deletions workflow/scripts/general_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def make_general_report(out, exps, sample, mg_preport, mt_preport, mp_preport, d
de_input, pd.read_csv(f'{out}/Quantification/{sample}_mt.readcounts', sep='\t').rename(
columns={'Gene': 'Entry'}), on='Entry', how='outer')
if len(mp_names) > 0:
spectracounts = pd.read_csv(f'{out}/Metaproteomics/{sample}/spectracounts.tsv', sep='\t')
spectracounts = pd.read_csv(f'{out}/Metaproteomics/{sample}_mp.spectracounts', sep='\t')
spectracounts.rename(columns={'Main Accession': 'qseqid'}, inplace=True)
report = pd.merge(report, spectracounts, on='qseqid', how='left')
mp_preport = pd.merge(mp_preport, report[['Entry'] + mp_names], on='Entry', how='outer')
Expand Down Expand Up @@ -96,9 +96,9 @@ def make_general_reports(out, exps, max_lines=1000000):
mp_report = mp_report.groupby('Entry')[mp_report.columns.tolist()[1:]].sum().reset_index()
mp_report[mp_report[mp_report.columns.tolist()[1:]].isnull().sum(
axis=1) < len(mp_report.columns.tolist()[1:])].drop_duplicates().to_csv(
f'{out}/Metaproteomics/mp_entry_quant', sep='\t', index=False)
f'{out}/Metaproteomics/mp_entry_quant.tsv', sep='\t', index=False)
# in the case of MP, there is no normalization by protein size. So it can be done this way
shutil.copyfile(f'{out}/Metaproteomics/mp_entry_quant', f'{out}/Quantification/dea_input.tsv')
shutil.copyfile(f'{out}/Metaproteomics/mp_entry_quant.tsv', f'{out}/Quantification/dea_input.tsv')


def run():
Expand Down
7 changes: 3 additions & 4 deletions workflow/scripts/metaproteomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ def select_proteins_for_second_search(self, original_db, output, results_files,
output=f'{output}/2nd_search_database.fasta')

def run(self):

Path(snakemake.params.output).mkdir(parents=True, exist_ok=True)
# 1st database construction
self.database_generation(
Expand Down Expand Up @@ -355,14 +354,14 @@ def run(self):
for name in snakemake.params.names:
out = f'{snakemake.params.output}/{name}'
self.compomics_run(
f'{snakemake.params.output}/2nd_search_database_target_decoy.fasta', f'{out}/2nd_search',
f'{out}/spectra', name, f'{snakemake.params.output}/2nd_params.par',
f'{snakemake.params.output}/2nd_search_database_concatenated_target_decoy.fasta',
f'{out}/2nd_search', f'{out}/spectra', name, f'{snakemake.params.output}/2nd_params.par',
threads=snakemake.threads, max_memory=snakemake.params.max_memory, reports=['9', '10'])
spectracounts = pd.merge(spectracounts, pd.read_csv(
f'{out}/2nd_search/reports/{name}_Default_Protein_Report.txt', sep='\t', index_col=0
)[['Main Accession', '#PSMs']].rename(columns={'#PSMs': name}), how='outer', on='Main Accession')
spectracounts.groupby('Main Accession')[snakemake.params.names].sum().reset_index().fillna(value=0.0).to_csv(
f'{snakemake.params.output}/{snakemake.wildcards.sample}_mp_spectracounts.tsv', sep='\t', index=False)
f'{snakemake.params.output}_mp.spectracounts', sep='\t', index=False)


if __name__ == '__main__':
Expand Down
1 change: 1 addition & 0 deletions workflow/scripts/normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ normalize_gene_expression <- function(df, output_file, norm_method, imput_method
library("vsn")
library("pcaMethods")
df[df == 0] <- NA
df <- df[rowSums(is.na(df)) < ncol(df), ]
norm <- exprs(justvsn(ExpressionSet(df)))
if (imput_method == "LLS") {
print("Performing Local Least Squares Imputation.")
Expand Down

0 comments on commit 0d27e5e

Please sign in to comment.