-
Notifications
You must be signed in to change notification settings - Fork 7
/
bqsr_recal_make_input.sh
executable file
·104 lines (86 loc) · 3.12 KB
/
bqsr_recal_make_input.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/bin/bash
##########################################################################
#
# Platform: NCI Gadi HPC
# Usage: bash bqsr_recal_make_input.sh <config prefix>
# Version: 2.0
#
# For more details see: https://github.com/Sydney-Informatics-Hub/Fastq-to-BAM
#
# If you use this script towards a publication, please acknowledge the
# Sydney Informatics Hub (or co-authorship, where appropriate).
#
# Suggested acknowledgement:
# The authors acknowledge the support provided by the Sydney Informatics Hub,
# a Core Research Facility of the University of Sydney. This research/project
# was undertaken with the assistance of resources and services from the National
# Computational Infrastructure (NCI), which is supported by the Australian
# Government, and the Australian BioCommons which is enabled by NCRIS via
# Bioplatforms Australia funding.
#
##########################################################################
cohort=../$1
group=true
if [[ $group =~ true ]]; then
printf "Creating group-based BQSR recalibration inputs from ${cohort}.config\n"
else
printf "Creating BQSR recalibration inputs from ${cohort}.config, no sample grouping applied\n"
fi
t_input=./Inputs/bqsr_recal.inputs-tumour
n_input=./Inputs/bqsr_recal.inputs-normal
input=./Inputs/bqsr_recal.inputs
rm -f $t_input
rm -f $n_input
rm -f $input
intervals=$(ls -1 ../Reference/BQSR_intervals/*list) #directory of text files generated by GATK SplitIntervals
intervals=($intervals)
# Collect sample IDs from cohort.config
while read -r sampleid labid seq_center library; do
if [[ ! ${sampleid} =~ ^#.*$ ]]
then
if [[ $group =~ true ]]; then
if [[ $labid =~ -N|-B ]]; then
samples_n+=("${labid}")
else
samples_t+=("${labid}")
fi
else
samples+=("${labid}")
fi
fi
done < "${cohort}.config"
for n_sample in ${samples_n[@]}; do
for ((i=0;i<${#intervals[@]};i++))
do
printf "${n_sample},${i},${intervals[i]}\n" >> ${n_input}
done
done
for t_sample in ${samples_t[@]}; do
for ((i=0;i<${#intervals[@]};i++))
do
printf "${t_sample},${i},${intervals[i]}\n" >> ${t_input}
done
done
for sample in ${samples[@]}; do
for ((i=0;i<${#intervals[@]};i++))
do
printf "${sample},${i},${intervals[i]}\n" >> ${input}
done
done
if [ -f $input ]
then
tasks=`wc -l < $input`
printf "Number of BaseRecalibrator tasks to run: ${tasks}\n"
fi
if [ -f $n_input ]
then
tasks=`wc -l < $n_input`
printf "Number of BaseRecalibrator normal sample tasks to run: ${tasks}\n"
fi
if [ -f $t_input ]
then
tasks=`wc -l < $t_input`
printf "Number of BaseRecalibrator tumour sample tasks to run: ${tasks}\n"
fi
printf "\n!!! Before running bqsr_recal_run_parallel.pbs, edit the script and unhash the correct task script for human/non-human samples\n"
printf "If you're using the non-human script, remember to fill in the \$dbsnp variable\n"