1
+ package org .labkey .sequenceanalysis .run .analysis ;
2
+
3
+ import org .apache .logging .log4j .Logger ;
4
+ import org .jetbrains .annotations .Nullable ;
5
+ import org .labkey .api .pipeline .PipelineJobException ;
6
+ import org .labkey .api .sequenceanalysis .model .AnalysisModel ;
7
+ import org .labkey .api .sequenceanalysis .model .Readset ;
8
+ import org .labkey .api .sequenceanalysis .pipeline .AbstractAnalysisStepProvider ;
9
+ import org .labkey .api .sequenceanalysis .pipeline .AbstractPipelineStep ;
10
+ import org .labkey .api .sequenceanalysis .pipeline .AnalysisOutputImpl ;
11
+ import org .labkey .api .sequenceanalysis .pipeline .AnalysisStep ;
12
+ import org .labkey .api .sequenceanalysis .pipeline .PipelineContext ;
13
+ import org .labkey .api .sequenceanalysis .pipeline .PipelineStepProvider ;
14
+ import org .labkey .api .sequenceanalysis .pipeline .ReferenceGenome ;
15
+ import org .labkey .api .sequenceanalysis .pipeline .SamtoolsIndexer ;
16
+ import org .labkey .api .sequenceanalysis .pipeline .SamtoolsRunner ;
17
+ import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
18
+ import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
19
+ import org .labkey .sequenceanalysis .util .SequenceUtil ;
20
+
21
+ import java .io .File ;
22
+ import java .util .ArrayList ;
23
+ import java .util .List ;
24
+
25
+ public class SawfishAnalysis extends AbstractPipelineStep implements AnalysisStep
26
+ {
27
+ public SawfishAnalysis (PipelineStepProvider <?> provider , PipelineContext ctx )
28
+ {
29
+ super (provider , ctx );
30
+ }
31
+
32
+ public static class Provider extends AbstractAnalysisStepProvider <SawfishAnalysis >
33
+ {
34
+ public Provider ()
35
+ {
36
+ super ("sawfish" , "Sawfish Analysis" , null , "This will run sawfish SV dicvoery and calling on the selected BAMs" , List .of (), null , null );
37
+ }
38
+
39
+
40
+ @ Override
41
+ public SawfishAnalysis create (PipelineContext ctx )
42
+ {
43
+ return new SawfishAnalysis (this , ctx );
44
+ }
45
+ }
46
+
47
+ @ Override
48
+ public Output performAnalysisPerSampleRemote (Readset rs , File inputBam , ReferenceGenome referenceGenome , File outputDir ) throws PipelineJobException
49
+ {
50
+ AnalysisOutputImpl output = new AnalysisOutputImpl ();
51
+
52
+ File inputFile = inputBam ;
53
+ if (SequenceUtil .FILETYPE .cram .getFileType ().isType (inputFile ))
54
+ {
55
+ CramToBam samtoolsRunner = new CramToBam (getPipelineCtx ().getLogger ());
56
+ File bam = new File (getPipelineCtx ().getWorkingDirectory (), inputFile .getName ().replaceAll (".cram$" , ".bam" ));
57
+ File bamIdx = new File (bam .getPath () + ".bai" );
58
+ if (!bamIdx .exists ())
59
+ {
60
+ samtoolsRunner .convert (inputFile , bam , referenceGenome .getWorkingFastaFile (), SequencePipelineService .get ().getMaxThreads (getPipelineCtx ().getLogger ()));
61
+ new SamtoolsIndexer (getPipelineCtx ().getLogger ()).execute (bam );
62
+ }
63
+ else
64
+ {
65
+ getPipelineCtx ().getLogger ().debug ("BAM index exists, will not re-convert CRAM" );
66
+ }
67
+
68
+ inputFile = bam ;
69
+
70
+ output .addIntermediateFile (bam );
71
+ output .addIntermediateFile (bamIdx );
72
+ }
73
+
74
+ List <String > args = new ArrayList <>();
75
+ args .add (getExe ().getPath ());
76
+ args .add ("discover" );
77
+
78
+ args .add ("--bam" );
79
+ args .add (inputFile .getPath ());
80
+
81
+ args .add ("--ref" );
82
+ args .add (referenceGenome .getWorkingFastaFile ().getPath ());
83
+
84
+ File svOutDir = new File (outputDir , "sawfish" );
85
+ args .add ("--output-dir" );
86
+ args .add (svOutDir .getPath ());
87
+
88
+ Integer maxThreads = SequencePipelineService .get ().getMaxThreads (getPipelineCtx ().getLogger ());
89
+ if (maxThreads != null )
90
+ {
91
+ args .add ("--threads" );
92
+ args .add (String .valueOf (maxThreads ));
93
+ }
94
+
95
+ File bcf = new File (svOutDir , "candidate.sv.bcf" );
96
+ File bcfIdx = new File (bcf .getPath () + ".csi" );
97
+ if (bcfIdx .exists ())
98
+ {
99
+ getPipelineCtx ().getLogger ().debug ("BCF index already exists, reusing output" );
100
+ }
101
+ else
102
+ {
103
+ new SimpleScriptWrapper (getPipelineCtx ().getLogger ()).execute (args );
104
+ }
105
+
106
+ if (!bcf .exists ())
107
+ {
108
+ throw new PipelineJobException ("Unable to find file: " + bcf .getPath ());
109
+ }
110
+
111
+ output .addSequenceOutput (bcf , rs .getName () + ": sawfish" , "Sawfish SV Discovery" , rs .getReadsetId (), null , referenceGenome .getGenomeId (), null );
112
+
113
+ return output ;
114
+ }
115
+
116
+ @ Override
117
+ public Output performAnalysisPerSampleLocal (AnalysisModel model , File inputBam , File referenceFasta , File outDir ) throws PipelineJobException
118
+ {
119
+ return null ;
120
+ }
121
+
122
+ private File getExe ()
123
+ {
124
+ return SequencePipelineService .get ().getExeForPackage ("SAWFISHPATH" , "sawfish" );
125
+ }
126
+
127
+ private static class CramToBam extends SamtoolsRunner
128
+ {
129
+ public CramToBam (Logger log )
130
+ {
131
+ super (log );
132
+ }
133
+
134
+ public void convert (File inputCram , File outputBam , File fasta , @ Nullable Integer threads ) throws PipelineJobException
135
+ {
136
+ getLogger ().info ("Converting CRAM to BAM" );
137
+
138
+ execute (getParams (inputCram , outputBam , fasta , threads ));
139
+ }
140
+
141
+ private List <String > getParams (File inputCram , File outputBam , File fasta , @ Nullable Integer threads )
142
+ {
143
+ List <String > params = new ArrayList <>();
144
+ params .add (getSamtoolsPath ().getPath ());
145
+ params .add ("view" );
146
+ params .add ("-b" );
147
+ params .add ("-T" );
148
+ params .add (fasta .getPath ());
149
+ params .add ("-o" );
150
+ params .add (outputBam .getPath ());
151
+
152
+ if (threads != null )
153
+ {
154
+ params .add ("-@" );
155
+ params .add (String .valueOf (threads ));
156
+ }
157
+
158
+ params .add (inputCram .getPath ());
159
+
160
+ return params ;
161
+ }
162
+ }
163
+ }
0 commit comments