-
Notifications
You must be signed in to change notification settings - Fork 404
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Control-FREEC improvements #196
Changes from all commits
8518d36
627019b
10cefaf
6d213a4
8148546
e3453c6
6aa1091
33ff41f
a7b0f00
a3e45c6
d00a8f7
8dfc48a
db4775c
642ddfb
94c82e2
c650e08
424a4f5
8ba3e47
84f039c
b238dd6
83e5348
114e2c0
b76f217
67bc341
9019dc3
6e7262f
9893a3b
05c6e0b
bc6155f
4bdff8e
eb22a0c
5c30fd8
11c3ad3
a722f3e
f4e9883
e71b7bc
70f34d6
d83a244
cb76fef
72d37b7
f202e18
42b19ff
f523aa8
e8c0bf1
bb7aa1f
7b4dda8
9be155c
37573e1
261aa4e
a94696d
0639d72
aae1015
6d2efd0
e03b7a4
20a3556
65a1069
34e9469
b72e27a
0ab51e5
0ae70ed
d1ebfb1
cf620c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,6 +109,7 @@ def helpMessage() { | |
--known_indels [file] knownIndels file | ||
--known_indels_index [file] knownIndels index | ||
If none provided, will be generated automatically if a knownIndels file is provided | ||
--mappability [file] Mappability file for Control-FREEC | ||
--species [str] Species for VEP | ||
--snpeff_db [str] snpEff Database version | ||
--vep_cache_version [int] VEP Cache version | ||
|
@@ -125,6 +126,13 @@ def helpMessage() { | |
--max_multiqc_email_size [str] Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) | ||
-name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic | ||
|
||
|
||
--normal_pileup [file] Normal pileup file for Control-FREEC | ||
--tumor_pileup [file] Tumor pileup file for Control-FREEC | ||
--cf_coeff [str] Control-FREEC coefficientOfVariation [0.015] | ||
--cf_ploidy [str] Control-FREEC ploidy [2] | ||
--cf_window [str] Control-FREEC window size [not set, as using coefficientOfVariation by default] | ||
|
||
AWSBatch options: | ||
--awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch | ||
--awsregion [str] The AWS Region for your AWSBatch job to run on | ||
|
@@ -361,6 +369,10 @@ if (!checkParameterList(annotateTools,annoList)) exit 1, 'Unknown tool(s) to ann | |
|
||
// Check parameters | ||
if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && params.ascat_purity)) exit 1, 'Please specify both --ascat_purity and --ascat_ploidy, or none of them' | ||
if ((params.normal_pileup && !params.tumor_pileup) || (!params.normal_pileup && params.tumor_pileup)) exit 1, 'Please specify both --normal_pileup and --tumor_pileup, or none of them' | ||
|
||
// for Control-FREEC window and coefficientOfVariation we need only one of them | ||
if (params.cf_window && params.cf_coeff) exit 1, 'Please specify either --cf_window OR --cf_coeff, but not both of them' | ||
|
||
// Has the run name been specified by the user? | ||
// This has the bonus effect of catching both -name and --name | ||
|
@@ -506,14 +518,18 @@ if (params.trim_fastq) { | |
summary['Saved Trimmed Fastq'] = params.save_trimmed ? 'Yes' : 'No' | ||
} | ||
|
||
if (params.no_intervals && step != 'annotate') summary['Intervals'] = 'Do not use' | ||
if ('haplotypecaller' in tools) summary['GVCF'] = params.no_gvcf ? 'No' : 'Yes' | ||
if ('strelka' in tools && 'manta' in tools ) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes' | ||
if (params.ascat_purity) summary['ASCAT purity'] = params.ascat_purity | ||
if (params.ascat_ploidy) summary['ASCAT ploidy'] = params.ascat_ploidy | ||
if (params.no_intervals && step != 'annotate') summary['Intervals'] = 'Do not use' | ||
if ('haplotypecaller' in tools) summary['GVCF'] = params.no_gvcf ? 'No' : 'Yes' | ||
if ('strelka' in tools && 'manta' in tools ) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes' | ||
if (params.ascat_purity) summary['ASCAT purity'] = params.ascat_purity | ||
if (params.ascat_ploidy) summary['ASCAT ploidy'] = params.ascat_ploidy | ||
if (params.no_gatk_spark) summary['MarkDuplicates GATK Spark'] = params.no_gatk_spark ? 'No' : 'Yes' | ||
if (params.sequencing_center) summary['Sequenced by'] = params.sequencing_center | ||
if (params.pon && 'mutect2' in tools) summary['Panel of normals'] = params.pon | ||
if (params.sequencing_center) summary['Sequenced by'] = params.sequencing_center | ||
if (params.pon && 'mutect2' in tools) summary['Panel of normals'] = params.pon | ||
if (params.mappability) summary['Mappability for Control-FREEC'] = params.mappability | ||
if (params.cf_window) summary['Window for Control-FREEC'] = params.cf_window | ||
if (params.cf_coeff) summary['coefficientOfVariation for Control-FREEC'] = params.cf_coeff | ||
if (params.cf_ploidy) summary['Ploidy for Control-FREEC'] = params.cf_ploidy | ||
|
||
summary['Save Reference'] = params.save_reference ? 'Yes' : 'No' | ||
summary['Nucleotides/s'] = params.nucleotides_per_second | ||
|
@@ -912,7 +928,7 @@ if (step in ['recalibrate', 'variantcalling', 'annotate']) { | |
|
||
(inputBam, inputBamFastQC) = inputBam.into(2) | ||
|
||
// Removing inputFile2 wich is null in case of uBAM | ||
// Removing inputFile2 which is null in case of uBAM | ||
inputBamFastQC = inputBamFastQC.map { | ||
idPatient, idSample, idRun, inputFile1, inputFile2 -> | ||
[idPatient, idSample, idRun, inputFile1] | ||
|
@@ -2194,8 +2210,7 @@ vcfFreebayesSingle = vcfFreebayesSingle.groupTuple(by: [0,1,2]) | |
SOMATIC VARIANT CALLING | ||
================================================================================ | ||
*/ | ||
|
||
// Ascat, Control-FREEC | ||
// Ascat, pileup, pileups with no intervals, recalibrated, and IDs when the pileup is provided by CLI for Control-FREEC | ||
(bamAscat, bamMpileup, bamMpileupNoInt, bamRecalAll) = bamRecalAll.into(4) | ||
|
||
// separate BAM by status | ||
|
@@ -2945,18 +2960,25 @@ process Mpileup { | |
file(fastaFai) from ch_fai | ||
|
||
output: | ||
set idPatient, idSample, file("${prefix}${idSample}.pileup.gz") into mpileupMerge | ||
set idPatient, idSample, file("${prefix}${idSample}.pileup") into mpileupMerge | ||
|
||
when: 'controlfreec' in tools || 'mpileup' in tools | ||
|
||
script: | ||
prefix = params.no_intervals ? "" : "${intervalBed.baseName}_" | ||
intervalsOptions = params.no_intervals ? "" : "-l ${intervalBed}" | ||
|
||
if(params.normal_pileup && params.tumor_pileup) // we already have pileups | ||
""" | ||
touch ${prefix}${idSample}.pileup | ||
""" | ||
else | ||
""" | ||
# Control-FREEC reads uncompresses the zipped file TWICE in single-threaded mode. | ||
# For the moment we are not using compressed pileups. | ||
samtools mpileup \ | ||
-f ${fasta} ${bam} \ | ||
${intervalsOptions} \ | ||
| bgzip --threads ${task.cpus} -c > ${prefix}${idSample}.pileup.gz | ||
${intervalsOptions} > ${prefix}${idSample}.pileup # | bgzip --threads ${task.cpus} -c > ${prefix}${idSample}.pileup.gz | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the output is not with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, have to be fixed in the publishDir as I see |
||
""" | ||
} | ||
|
||
|
@@ -2969,8 +2991,9 @@ if (!params.no_intervals) { | |
} | ||
|
||
// STEP MPILEUP.2 - MERGE | ||
|
||
process MergeMpileup { | ||
label 'cpus_1' | ||
|
||
tag {idSample} | ||
|
||
publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { it == "${idSample}.pileup.gz" ? "VariantCalling/${idSample}/mpileup/${it}" : '' } | ||
|
@@ -2979,19 +3002,25 @@ process MergeMpileup { | |
set idPatient, idSample, file(mpileup) from mpileupMerge | ||
|
||
output: | ||
set idPatient, idSample, file("${idSample}.pileup.gz") into mpileupOut | ||
set idPatient, idSample, file("${idSample}.pileup") into mpileupOut | ||
|
||
when: !(params.no_intervals) && 'controlfreec' in tools || 'mpileup' in tools | ||
|
||
script: | ||
if(params.normal_pileup && params.tumor_pileup) | ||
""" | ||
if [[ ${params.normal_pileup} =~ $idSample ]] ; then ln -s ${params.normal_pileup} ${idSample}.pileup ; fi | ||
if [[ ${params.tumor_pileup} =~ $idSample ]] ; then ln -s ${params.tumor_pileup} ${idSample}.pileup ; fi | ||
""" | ||
else | ||
""" | ||
for i in `ls -1v *.pileup.gz`; | ||
do zcat \$i >> ${idSample}.pileup | ||
for i in `ls -1v *.pileup`; | ||
do cat \$i >> ${idSample}.pileup | ||
done | ||
|
||
bgzip --threads ${task.cpus} -c ${idSample}.pileup > ${idSample}.pileup.gz | ||
#bgzip --threads ${task.cpus} -c ${idSample}.pileup > ${idSample}.pileup.gz | ||
|
||
rm ${idSample}.pileup | ||
#rm ${idSample}.pileup | ||
""" | ||
} | ||
|
||
|
@@ -3015,7 +3044,8 @@ mpileupOut = mpileupOut.map { | |
// STEP CONTROLFREEC.1 - CONTROLFREEC | ||
|
||
process ControlFREEC { | ||
label 'memory_singleCPU_2_task' | ||
label 'cpus_max' | ||
//label 'memory_singleCPU_2_task' | ||
|
||
tag {idSampleTumor + "_vs_" + idSampleNormal} | ||
|
||
|
@@ -3031,28 +3061,42 @@ process ControlFREEC { | |
file(fastaFai) from ch_fai | ||
|
||
output: | ||
set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.pileup.gz_CNVs"), file("${idSampleTumor}.pileup.gz_ratio.txt"), file("${idSampleTumor}.pileup.gz_normal_CNVs"), file("${idSampleTumor}.pileup.gz_normal_ratio.txt"), file("${idSampleTumor}.pileup.gz_BAF.txt"), file("${idSampleNormal}.pileup.gz_BAF.txt") into controlFreecViz | ||
set file("*.pileup.gz*"), file("${idSampleTumor}_vs_${idSampleNormal}.config.txt") into controlFreecOut | ||
set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.pileup_CNVs"), file("${idSampleTumor}.pileup_ratio.txt"), file("${idSampleTumor}.pileup_normal_CNVs"), file("${idSampleTumor}.pileup_normal_ratio.txt"), file("${idSampleTumor}.pileup_BAF.txt"), file("${idSampleNormal}.pileup_BAF.txt") into controlFreecViz | ||
set file("*.pileup*"), file("${idSampleTumor}_vs_${idSampleNormal}.config.txt") into controlFreecOut | ||
|
||
when: 'controlfreec' in tools | ||
|
||
script: | ||
config = "${idSampleTumor}_vs_${idSampleNormal}.config.txt" | ||
gender = genderMap[idPatient] | ||
// if we are using coefficientOfVariation, we must delete the window parameter | ||
// it is "window = 20000" in the default settings, without coefficientOfVariation set, | ||
// but we do not like it. Note, it is not written in stone | ||
coeff = "# coefficientOfVariation is not used" | ||
window = "# window parameter is not used" | ||
if(params.cf_coeff) { | ||
coeff = "$params.cf_coeff" | ||
} else if(params.cf_window) { | ||
window = "window = ${params.cf_window}" | ||
} else { // default settings | ||
coeff = "coefficientOfVariation = 0.015" | ||
} | ||
mappability = params.mappability ? "gemMappabilityFile = ${params.mappability}" : "# gemMappabilityFile is not used " | ||
""" | ||
touch ${config} | ||
echo "[general]" >> ${config} | ||
echo "BedGraphOutput = TRUE" >> ${config} | ||
echo "chrFiles = \${PWD}/${chrDir.fileName}" >> ${config} | ||
echo "chrLenFile = \${PWD}/${chrLength.fileName}" >> ${config} | ||
echo "coefficientOfVariation = 0.05" >> ${config} | ||
echo "${coeff}" >> ${config} | ||
echo "contaminationAdjustment = TRUE" >> ${config} | ||
echo "forceGCcontentNormalization = 0" >> ${config} | ||
echo "forceGCcontentNormalization = 1" >> ${config} | ||
echo "maxThreads = ${task.cpus}" >> ${config} | ||
echo "minimalSubclonePresence = 20" >> ${config} | ||
echo "ploidy = 2,3,4" >> ${config} | ||
echo "ploidy = ${params.cf_ploidy}" >> ${config} | ||
echo "sex = ${gender}" >> ${config} | ||
echo "window = 50000" >> ${config} | ||
echo "${window}" >> ${config} | ||
echo "${mappability}" >> ${config} | ||
echo "" >> ${config} | ||
|
||
echo "[control]" >> ${config} | ||
|
@@ -3094,11 +3138,20 @@ process ControlFreecViz { | |
when: 'controlfreec' in tools | ||
|
||
""" | ||
cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args ${cnvTumor} ${ratioTumor} | ||
cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args ${cnvNormal} ${ratioNormal} | ||
echo "Shaping CNV files to make sure we can assess signifacance" | ||
awk 'NF==9{print}' ${cnvTumor} > TUMOR.CNVs | ||
awk 'NF==7{print}' ${cnvNormal} > NORMAL.CNVs | ||
echo "############### Calculating significance values for TUMOR CNVs #############" | ||
cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args TUMOR.CNVs ${ratioTumor} | ||
echo "############### Calculating significance values for NORMAL CNVs ############" | ||
cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args NORMAL.CNVs ${ratioNormal} | ||
echo "############### Creating graph for TUMOR ratios ###############" | ||
cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/makeGraph.R | R --slave --args 2 ${ratioTumor} ${bafTumor} | ||
echo "############### Creating graph for NORMAL ratios ##############" | ||
cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/makeGraph.R | R --slave --args 2 ${ratioNormal} ${bafNormal} | ||
echo "############### Creating BED files for TUMOR ##############" | ||
perl /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/freec2bed.pl -f ${ratioTumor} > ${idSampleTumor}.bed | ||
echo "############### Creating BED files for NORMAL #############" | ||
perl /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/freec2bed.pl -f ${ratioNormal} > ${idSampleNormal}.bed | ||
""" | ||
} | ||
|
@@ -3694,6 +3747,8 @@ def nfcoreHeader() { | |
c_reset = params.monochrome_logs ? '' : "\033[0m"; | ||
c_white = params.monochrome_logs ? '' : "\033[0;37m"; | ||
c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; | ||
|
||
return "--" | ||
Comment on lines
+3750
to
+3751
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no sure why you added that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I could not see the debug messages. Will be restored eventually :) |
||
|
||
return """ -${c_dim}--------------------------------------------------${c_reset}- | ||
${c_green},--.${c_black}/${c_green},-.${c_reset} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure I understand the idea here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So instead of a useless process here and in the next one, I think we make a new channel that match the ControlFreec process with the input mpileups and make it conditional so we don't even use these two processes at all.
That would be better, and safer, and easier to understand