Skip to content

Commit

Permalink
fixed the parallel version of the script and updated read me about bug
Browse files Browse the repository at this point in the history
  • Loading branch information
unique379r committed May 3, 2021
1 parent 816c8f2 commit 95f52dd
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 35 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ XATLAS = ../user/path/xatlas

PARALLEL = ../user/path/parallel

## Note
One may encounter with a bug that using wrapper (`STRspy_run_v1.0.sh`), STRspy parallel version may not able to communicate properly with "gnu parallel" and exit the workfow without mapping or further steps of the analysis. Solution to this, the user may either run the script directly from scripts/STRspy_v1.0.sh in the STRspy dir or modify the STRspy_run_v1.0.sh script and allow the nested loop version of the workflow, but bear in mind that this is a little slower than the parallel version.

## Contacts
bioinforupesh200 DOT au AT gmail DOT com
Expand Down
14 changes: 11 additions & 3 deletions STRspy_run_v1.0.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ if [ ! -s $InputConfig ]; then
fi

## Extract parameters from the given file
clear
#clear
echo -e "\n"
echo -e "########################################################"
echo -e "################# Input Parameters #################"
Expand Down Expand Up @@ -127,12 +127,20 @@ echo -e "#PARALLEL :" $PARALLEL
#### Analysis begins ###
########################


if [[ -f "./scripts/STRspy_Parallel_v1.0.sh" ]]; then
echo -e "\n"
echo -e "\t\t ~ ~ ~ ~ Running STRspy_Parallel_v1.0.sh ~ ~ ~ ~ "
echo -e "\t\t ~ ~ ~ ~ Running STRspy ~ ~ ~ ~ "
#echo -e "\t\tAnalysis date:" `date`
echo -e "\n"
bash ./scripts/STRspy_Parallel_v0.1.sh $INPUT_DIR $INPUT_BAM $READ_TYPE $STR_FASTA $STR_BED $GENOME_FASTA $REGION_BED $NORM_CUTOFF $OUTPUT_DIR $ToolsConfig
##############################
## nested loop version STRspy
#bash scripts/STRspy_v1.0.sh "$INPUT_DIR" "$INPUT_BAM" "$READ_TYPE" "$STR_FASTA" "$STR_BED" "$GENOME_FASTA" "$REGION_BED" "$NORM_CUTOFF" "$OUTPUT_DIR" "$ToolsConfig"
#bash scripts/STRspy_v1.0.sh "$INPUT_DIR" "$INPUT_BAM" "$READ_TYPE" "$STR_FASTA" "$STR_BED" "$GENOME_FASTA" "$REGION_BED" "$NORM_CUTOFF" "$OUTPUT_DIR" "$ToolsConfig"
##############################
## parallel version of STRspy
#bash scripts/STRspy_Parallel_v1.0.sh "$INPUT_DIR" "$INPUT_BAM" "$READ_TYPE" "$STR_FASTA" "$STR_BED" "$GENOME_FASTA" "$REGION_BED" "$NORM_CUTOFF" "$OUTPUT_DIR" "$ToolsConfig"
bash scripts/STRspy_Parallel_v1.0.sh "$INPUT_DIR" "$INPUT_BAM" "$READ_TYPE" "$STR_FASTA" "$STR_BED" "$GENOME_FASTA" "$REGION_BED" "$NORM_CUTOFF" "$OUTPUT_DIR" "$ToolsConfig"
#echo -e "#Analysis finished.\n" `date`
else
echo -e "please make sure you are in the same directory of STRspy
Expand Down
38 changes: 25 additions & 13 deletions scripts/STRspy_Parallel_v1.0.sh
Original file line number Diff line number Diff line change
Expand Up @@ -234,26 +234,38 @@ if [[ $read_type == "fastq" ]]; then
mkdir -p $output_dir/GenomeMapping
echo -e "#fastq map to genome.."
if [[ "$readtype" == "ont" ]]; then
$parallel -X "$minimap --MD -L -t 1 -ax map-ont $genome {} -o $output_dir/GenomeMapping/{/.}.minimap.sam" ::: $input_reads_dir/*.fastq
$parallel -j5 "$minimap --MD -L -t 1 -ax map-ont $genome {} -o $output_dir/GenomeMapping/{/.}.minimap.sam" ::: $input_reads_dir/*.fastq
elif [[ "$readtype" == "pb" ]]; then
$parallel -X "$minimap --MD -L -t 1 -ax map-pb $genome {} -o $output_dir/GenomeMapping/{/.}.minimap.sam" ::: $input_reads_dir/*.fastq
$parallel -j5 "$minimap --MD -L -t 1 -ax map-pb $genome {} -o $output_dir/GenomeMapping/{/.}.minimap.sam" ::: $input_reads_dir/*.fastq
else
echo -e "#Provided Read type is not known"
exit 1;
fi
## sam2bam
echo -e "#sam to bam.."
$parallel -X "$samtools view -S -b {} -o $output_dir/GenomeMapping/{/.}.bam" ::: $output_dir/GenomeMapping/*.sam
## bam2sortedbam and index
echo -e "#bam sort and index.."
$parallel -X "$samtools sort -o $output_dir/GenomeMapping/{/.}.sorted.bam {}" ::: $output_dir/GenomeMapping/*.minimap.bam
## bam index
$parallel -X "$samtools index {}" ::: $output_dir/GenomeMapping/*.sorted.bam
## remove temp sam and bam
rm -rf $output_dir/GenomeMapping/*.sam $output_dir/GenomeMapping/*.minimap.bam
echo -e "#Done."
fi
#### checking sam files existence
if [[ "$is_input_bam" == "no" ]]; then
type="fastq"
sam=($output_dir/GenomeMapping/*.minimap.sam)
if [[ -e "${sam[0]}" ]]; then
## sam2bam
echo -e "#sam to bam.."
$parallel -j5 "$samtools view -S -b {} -o $output_dir/GenomeMapping/{/.}.bam" ::: $output_dir/GenomeMapping/*.sam
## bam2sortedbam and index
echo -e "#bam sort and index.."
$parallel -j5 "$samtools sort -o $output_dir/GenomeMapping/{/.}.sorted.bam {}" ::: $output_dir/GenomeMapping/*.minimap.bam
## bam index
$parallel -j5 "$samtools index {}" ::: $output_dir/GenomeMapping/*.sorted.bam
## remove temp sam and bam
rm -rf $output_dir/GenomeMapping/*.sam $output_dir/GenomeMapping/*.minimap.bam
echo -e "#Done."
else
echo -e "\n#ERROR: Minimap sams from fastq inputs are not found !!\n"
exit 1;
fi
fi
#### checking bam files existence
if [[ "$is_input_bam" == "no" ]]; then
type="fastq"
Expand Down
38 changes: 19 additions & 19 deletions scripts/STRspy_v1.0.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ tools_config="${10}" ## tools config

print_USAGE()
{
echo -e "USAGE: bash ./STRspy_v1.0.sh <input_reads_dir(fastq/bam dir)> <is_input_bam(yes/no)> <ReadType(ont/pb)> <motif_fasta_dir> <motif_bed_dir> <region_bed> <filter_threshold> <output_dir> <tools_config>\n"
echo -e "USAGE: bash ./STRspy_v1.0.sh <input_reads_dir(fastq/bam dir)> <is_input_bam(yes/no)> <ReadType(ont/pb)> <motif_fasta_dir> <motif_bed_dir> <genome_fa> <region_bed> <filter_threshold> <output_dir> <tools_config>\n"
echo "EXAMPLE (positional arguments = counts => 10):"
echo "#In case of bam input dir"
echo "bash ./STRspy_v1.0.sh example/test_dir yes pb example/str_fa example/str_bed NULL region_bed 0.4 output_dir tools_config.tx"
echo "bash ./STRspy_v1.0.sh example/test_dir yes pb example/str_fa example/str_bed NULL region_bed 0.4 output_dir tools_config.txt"
echo "#In case of fastq input dir"
echo "bash ./STRspy_v1.0.sh example/test_dir no ont example/str_fa example/str_bed example/ref_fasta/hg19.fa region_bed 0.4 output_dir tools_config.tx"
echo "bash ./STRspy_v1.0.sh example/test_dir no ont str_fa str_bed ref_fasta/hg19.fa region_bed 0.4 output_dir tools_config.txt"
echo -e "\n"
}

Expand Down Expand Up @@ -268,29 +268,29 @@ if [[ "$is_input_bam" == "no" ]]; then
fi
fi
## get cov info in case of bam provided
#### checking bam files existence
if [[ "$is_input_bam" == "yes" ]]; then
bam=($input_reads_dir/*.bam)
type="bam"
mkdir -p $output_dir/GenomicMappingStats
if [[ -e "${bam[0]}" ]]; then
echo -e "#Summerizing the mapped and unmapped reads and their percentage"
get_cov $input_reads_dir $output_dir/GenomicMappingStats $region_bed
echo -e "#Done"
else
echo -e "\n#ERROR: Provided Genomic bams are not found !!\n"
exit 1;
fi
fi
# ## get cov info in case of bam provided
# #### checking bam files existence
# if [[ "$is_input_bam" == "yes" ]]; then
# bam=($input_reads_dir/*.bam)
# type="bam"
# mkdir -p $output_dir/GenomicMappingStats
# if [[ -e "${bam[0]}" ]]; then
# echo -e "#Summerizing the mapped and unmapped reads and their percentage"
# get_cov $input_reads_dir $output_dir/GenomicMappingStats $region_bed
# echo -e "#Done"
# else
# echo -e "\n#ERROR: Provided Genomic bams are not found !!\n"
# exit 1;
# fi
# fi
# #"==================================================================="
# ## step2 : mapping to STR.fa + conting + normalization + SNV calling
# #"==================================================================="
if [[ $read_type == "$type" ]]; then
echo -e "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo -e "#Spying on STR for a given samples...."
echo -e "#Spying on STR for a given samples(skipped mapping since bam was provided)...."
## create a inner and outer loop
for bamfile in "${bam[@]}"; do
bam_name="${bamfile##*/}"
Expand Down

0 comments on commit 95f52dd

Please sign in to comment.