diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java index dfc4f0232..27152ed45 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java @@ -44,6 +44,7 @@ abstract public class AbstractCommandWrapper implements CommandWrapper private File _outputDir = null; private File _workingDir = null; private Logger _log; + private boolean _logPath = false; private Level _logLevel = Level.DEBUG; private boolean _warnNonZeroExits = true; private boolean _throwNonZeroExits = true; @@ -205,9 +206,11 @@ private void setPath(ProcessBuilder pb) { String path = System.getenv("PATH"); - getLogger().debug("Existing PATH: " + path); - getLogger().debug("toolDir: " + toolDir); - + if (_logPath) + { + getLogger().debug("Existing PATH: " + path); + getLogger().debug("toolDir: " + toolDir); + } if (path == null) { @@ -229,11 +232,19 @@ private void setPath(ProcessBuilder pb) path = fileExe.getParent() + File.pathSeparatorChar + path; } - getLogger().debug("using path: " + path); + if (_logPath) + { + getLogger().debug("using path: " + path); + } pb.environment().put("PATH", path); } } + public void setLogPath(boolean logPath) + { + _logPath = logPath; + } + public void setOutputDir(File outputDir) { _outputDir = outputDir; diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index e3cce3c55..8ecd60f37 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -319,3 +319,17 @@ then else echo "Already installed" fi + +if [[ ! -e ${LKTOOLS_DIR}/sawfish || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf $LKTOOLS_DIR/sawfish* + + wget https://github.com/PacificBiosciences/sawfish/releases/download/v2.0.0/sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz + tar -xzf sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz + + mv sawfish-v2.0.0-x86_64-unknown-linux-gnu $LKTOOLS_DIR/ + ln -s $LKTOOLS_DIR/sawfish-v2.0.0/bin/sawfish $LKTOOLS_DIR/ +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 94f2bde2e..3ac54c27c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -123,6 +123,8 @@ import org.labkey.sequenceanalysis.run.analysis.PbsvAnalysis; import org.labkey.sequenceanalysis.run.analysis.PbsvJointCallingHandler; import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SawfishAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SawfishJointCallingHandler; import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis; import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis; import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis; @@ -342,6 +344,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new PindelAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new PbsvAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new GenrichStep.Provider()); + SequencePipelineService.get().registerPipelineStep(new SawfishAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new PARalyzerAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new RnaSeQCStep.Provider()); @@ -400,6 +403,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler()); SequenceAnalysisService.get().registerFileHandler(new ConvertToCramHandler()); SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler()); + SequenceAnalysisService.get().registerFileHandler(new SawfishJointCallingHandler()); SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler()); SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); SequenceAnalysisService.get().registerFileHandler(new ParagraphStep()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java new file mode 100644 index 000000000..421f77ffb --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java @@ -0,0 +1,163 @@ +package org.labkey.sequenceanalysis.run.analysis; + +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.model.AnalysisModel; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractAnalysisStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStep; +import org.labkey.api.sequenceanalysis.pipeline.AnalysisOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsIndexer; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +public class SawfishAnalysis extends AbstractPipelineStep implements AnalysisStep +{ + public SawfishAnalysis(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx); + } + + public static class Provider extends AbstractAnalysisStepProvider + { + public Provider() + { + super("sawfish", "Sawfish Analysis", null, "This will run sawfish SV dicvoery and calling on the selected BAMs", List.of(), null, null); + } + + + @Override + public SawfishAnalysis create(PipelineContext ctx) + { + return new SawfishAnalysis(this, ctx); + } + } + + @Override + public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException + { + AnalysisOutputImpl output = new AnalysisOutputImpl(); + + File inputFile = inputBam; + if (SequenceUtil.FILETYPE.cram.getFileType().isType(inputFile)) + { + CramToBam samtoolsRunner = new CramToBam(getPipelineCtx().getLogger()); + File bam = new File(getPipelineCtx().getWorkingDirectory(), inputFile.getName().replaceAll(".cram$", ".bam")); + File bamIdx = new File(bam.getPath() + ".bai"); + if (!bamIdx.exists()) + { + samtoolsRunner.convert(inputFile, bam, referenceGenome.getWorkingFastaFile(), SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger())); + new SamtoolsIndexer(getPipelineCtx().getLogger()).execute(bam); + } + else + { + getPipelineCtx().getLogger().debug("BAM index exists, will not re-convert CRAM"); + } + + inputFile = bam; + + output.addIntermediateFile(bam); + output.addIntermediateFile(bamIdx); + } + + List args = new ArrayList<>(); + args.add(getExe().getPath()); + args.add("discover"); + + args.add("--bam"); + args.add(inputFile.getPath()); + + args.add("--ref"); + args.add(referenceGenome.getWorkingFastaFile().getPath()); + + File svOutDir = new File(outputDir, "sawfish"); + args.add("--output-dir"); + args.add(svOutDir.getPath()); + + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); + if (maxThreads != null) + { + args.add("--threads"); + args.add(String.valueOf(maxThreads)); + } + + File bcf = new File(svOutDir, "candidate.sv.bcf"); + File bcfIdx = new File(bcf.getPath() + ".csi"); + if (bcfIdx.exists()) + { + getPipelineCtx().getLogger().debug("BCF index already exists, reusing output"); + } + else + { + new SimpleScriptWrapper(getPipelineCtx().getLogger()).execute(args); + } + + if (!bcf.exists()) + { + throw new PipelineJobException("Unable to find file: " + bcf.getPath()); + } + + output.addSequenceOutput(bcf, rs.getName() + ": sawfish", "Sawfish SV Discovery", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); + + return output; + } + + @Override + public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException + { + return null; + } + + private File getExe() + { + return SequencePipelineService.get().getExeForPackage("SAWFISHPATH", "sawfish"); + } + + private static class CramToBam extends SamtoolsRunner + { + public CramToBam(Logger log) + { + super(log); + } + + public void convert(File inputCram, File outputBam, File fasta, @Nullable Integer threads) throws PipelineJobException + { + getLogger().info("Converting CRAM to BAM"); + + execute(getParams(inputCram, outputBam, fasta, threads)); + } + + private List getParams(File inputCram, File outputBam, File fasta, @Nullable Integer threads) + { + List params = new ArrayList<>(); + params.add(getSamtoolsPath().getPath()); + params.add("view"); + params.add("-b"); + params.add("-T"); + params.add(fasta.getPath()); + params.add("-o"); + params.add(outputBam.getPath()); + + if (threads != null) + { + params.add("-@"); + params.add(String.valueOf(threads)); + } + + params.add(inputCram.getPath()); + + return params; + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java new file mode 100644 index 000000000..ce3b066a9 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java @@ -0,0 +1,183 @@ +package org.labkey.sequenceanalysis.run.analysis; + +import org.apache.commons.io.FileUtils; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.stream.Collectors; + +public class SawfishJointCallingHandler extends AbstractParameterizedOutputHandler +{ + private static final String OUTPUT_CATEGORY = "Sawfish VCF"; + + public SawfishJointCallingHandler() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.NAME), "Sawfish Joint-Call", "Runs sawfish joint-call, which jointly calls SVs from PacBio CCS data", new LinkedHashSet<>(List.of("sequenceanalysis/panel/VariantScatterGatherPanel.js")), Arrays.asList( + ToolParameterDescriptor.create("fileName", "VCF Filename", "The name of the resulting file.", "textfield", new JSONObject(){{ + put("allowBlank", false); + put("doNotIncludeInTemplates", true); + }}, null) + )); + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && SequenceUtil.FILETYPE.bcf.getFileType().isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + public static class Processor implements SequenceOutputProcessor + { + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + List filesToProcess = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toList()); + + ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenomes().iterator().next(); + String outputBaseName = ctx.getParams().getString("fileName"); + if (!outputBaseName.toLowerCase().endsWith(".gz")) + { + outputBaseName = outputBaseName.replaceAll(".gz$", ""); + } + + if (!outputBaseName.toLowerCase().endsWith(".vcf")) + { + outputBaseName = outputBaseName.replaceAll(".vcf$", ""); + } + + File expectedFinalOutput = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz"); + + File ouputVcf = runSawfishCall(ctx, filesToProcess, genome, outputBaseName); + + SequenceOutputFile so = new SequenceOutputFile(); + so.setName("Sawfish call: " + outputBaseName); + so.setFile(ouputVcf); + so.setCategory(OUTPUT_CATEGORY); + so.setLibrary_id(genome.getGenomeId()); + + ctx.addSequenceOutput(so); + } + + private File runSawfishCall(JobContext ctx, List inputs, ReferenceGenome genome, String outputBaseName) throws PipelineJobException + { + if (inputs.isEmpty()) + { + throw new PipelineJobException("No inputs provided"); + } + + List args = new ArrayList<>(); + args.add(getExe().getPath()); + args.add("joint-call"); + + Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + if (maxThreads != null) + { + args.add("--threads"); + args.add(String.valueOf(maxThreads)); + } + + args.add("--ref"); + args.add(genome.getWorkingFastaFile().getPath()); + + for (File sample : inputs) + { + args.add("--sample"); + args.add(sample.getParentFile().getPath()); + } + + File outDir = new File(ctx.getOutputDir(), "sawfish"); + args.add("--output-dir"); + args.add(outDir.getPath()); + + new SimpleScriptWrapper(ctx.getLogger()).execute(args); + + File vcfOut = new File(outDir, "genotyped.sv.vcf.gz"); + if (!vcfOut.exists()) + { + throw new PipelineJobException("Unable to find file: " + vcfOut.getPath()); + } + + File vcfOutFinal = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz"); + + try + { + if (vcfOutFinal.exists()) + { + vcfOutFinal.delete(); + } + FileUtils.moveFile(vcfOut, vcfOutFinal); + + File targetIndex = new File(vcfOutFinal.getPath() + ".tbi"); + if (targetIndex.exists()) + { + targetIndex.delete(); + } + + File origIndex = new File(vcfOut.getPath() + ".tbi"); + if (origIndex.exists()) + { + FileUtils.moveFile(origIndex, targetIndex); + } + else + { + SequenceAnalysisService.get().ensureVcfIndex(vcfOutFinal, ctx.getLogger(), true); + } + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + return vcfOutFinal; + } + + private File getExe() + { + return SequencePipelineService.get().getExeForPackage("PBSVPATH", "pbsv"); + } + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java index 0a2ce0784..a5357d176 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java @@ -96,6 +96,7 @@ public enum FILETYPE bed(Collections.singletonList(".bed"), true), bw(Collections.singletonList(".bw"), false), vcf(List.of(".vcf"), true), + bcf(List.of(".bcf"), true), gvcf(List.of(".g.vcf"), true); private final List _extensions; diff --git a/Studies/src/org/labkey/studies/query/LookupSetsTable.java b/Studies/src/org/labkey/studies/query/LookupSetsTable.java index 4d6ff8230..83c8c78ef 100644 --- a/Studies/src/org/labkey/studies/query/LookupSetsTable.java +++ b/Studies/src/org/labkey/studies/query/LookupSetsTable.java @@ -38,14 +38,14 @@ public UpdateService(SimpleUserSchema.SimpleTable ti) @Override protected void afterInsertUpdate(int count, BatchValidationException errors) { - LookupSetsManager.get().getCache().clear(); + StudiesUserSchema.repopulateCaches(getUserSchema().getUser(), getUserSchema().getContainer()); } @Override protected Map deleteRow(User user, Container container, Map oldRowMap) throws QueryUpdateServiceException, SQLException, InvalidKeyException { Map row = super.deleteRow(user, container, oldRowMap); - LookupSetsManager.get().getCache().clear(); + StudiesUserSchema.repopulateCaches(getUserSchema().getUser(), getUserSchema().getContainer()); return row; } @@ -53,7 +53,7 @@ protected Map deleteRow(User user, Container container, Map> getPropertySetNames() { Map> nameMap = (Map>) LookupSetsManager.get().getCache().get(LookupSetTable.getCacheKey(getTargetContainer())); @@ -81,6 +90,7 @@ private Map> getPropertySetNames() return nameMap; } + _log.debug("Populating lookup tables in StudiesUserSchema.getPropertySetNames() for container: " + getTargetContainer().getName()); nameMap = new CaseInsensitiveHashMap<>(); TableSelector ts = new TableSelector(_dbSchema.getTable(TABLE_LOOKUP_SETS), new SimpleFilter(FieldKey.fromString("container"), getTargetContainer().getId()), null); diff --git a/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java b/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java index 4fc7a91d8..cd83a749c 100644 --- a/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java +++ b/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java @@ -291,4 +291,16 @@ public URLHelper getRedirectURL(Object o) throws Exception return DetailsURL.fromString("laboratory/setTableIncrementValue.view", getContainer()).getActionURL(); } } + + // This allows registration of this action without creating a dependency between laboratory and discvrcore + @UtilityAction(label = "Manage File Roots", description = "This standalone file root management action can be used on folder types that do not support the normal 'Manage Folder' UI.") + @RequiresPermission(AdminPermission.class) + public class ManageFileRootAction extends SimpleRedirectAction + { + @Override + public URLHelper getRedirectURL(Object o) throws Exception + { + return DetailsURL.fromString("admin/manageFileRoot.view", getContainer()).getActionURL(); + } + } } diff --git a/singlecell/resources/chunks/FindClustersAndDimRedux.R b/singlecell/resources/chunks/FindClustersAndDimRedux.R index b7fdc7c0f..47ecfa6c2 100644 --- a/singlecell/resources/chunks/FindClustersAndDimRedux.R +++ b/singlecell/resources/chunks/FindClustersAndDimRedux.R @@ -15,24 +15,26 @@ if (!reticulate::py_module_available(module = 'leidenalg')) { } } +if (all(is.null(clusterResolutions)) || clusterResolutions == '') { + clusterResolutions <- c(0.2, 0.4, 0.6, 0.8, 1.2) +} else if (is.character(clusterResolutions)) { + clusterResolutionsOrig <- clusterResolutions + clusterResolutions <- gsub(clusterResolutions, pattern = ' ', replacement = '') + clusterResolutions <- unlist(strsplit(clusterResolutions, split = ',')) + clusterResolutions <- as.numeric(clusterResolutions) + if (any(is.na(clusterResolutions))) { + stop(paste0('Some values for clusterResolutions were not numeric: ', clusterResolutionsOrig)) + } +} else if (is.numeric(clusterResolutions)) { + # No action needed +} else { + stop('Must provide a value for clusterResolutions') +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - if (all(is.null(clusterResolutions))) { - clusterResolutions <- c(0.2, 0.4, 0.6, 0.8, 1.2) - } else if (is.character(clusterResolutions)) { - clusterResolutionsOrig <- clusterResolutions - clusterResolutions <- gsub(clusterResolutions, pattern = ' ', replacement = '') - clusterResolutions <- unlist(strsplit(clusterResolutions, split = ',')) - clusterResolutions <- as.numeric(clusterResolutions) - if (any(is.na(clusterResolutions))) { - stop(paste0('Some values for clusterResolutions were not numeric: ', clusterResolutionsOrig)) - } - } else { - stop('Must provide a value for clusterResolutions') - } - seuratObj <- CellMembrane::FindClustersAndDimRedux(seuratObj, minDimsToUse = minDimsToUse, useLeiden = useLeiden, clusterResolutions = clusterResolutions) saveData(seuratObj, datasetId) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java index ec56a127c..7a646eb38 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java @@ -33,7 +33,8 @@ public Provider() put("height", 150); put("width", 600); put("delimiter", DELIM); - }}, null) + }}, null), + ToolParameterDescriptor.create("useDplyr", "Use dplyr", "If checked, the subset will be executed using dplyr::filter rather than Seurat::subset. This should allow more complex expressions to be used, including negations", "checkbox", null, false) ), List.of("/sequenceanalysis/field/TrimmingTextArea.js"), null); } @@ -70,6 +71,9 @@ protected List loadChunkFromFile() throws PipelineJobException final String[] values = val.split(DELIM); + ToolParameterDescriptor pd2 = getProvider().getParameterByName("useDplyr"); + final boolean useDplyr = pd2.extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + List ret = new ArrayList<>(); for (String line : super.loadChunkFromFile()) { @@ -81,15 +85,23 @@ protected List loadChunkFromFile() throws PipelineJobException ret.add("\tif (!is.null(seuratObj)) {"); ret.add("\tprint(paste0('Subsetting dataset: ', datasetId, ' with the expression: " + subsetEscaped + "'))"); - ret.add("\t\tcells <- c()"); - ret.add("\t\ttryCatch({"); - ret.add("\t\t\tcells <- WhichCells(seuratObj, expression = " + subset + ")"); - ret.add("\t\t}, error = function(e){"); - ret.add("\t\t\tif (!is.null(e) && e$message == 'Cannot find cells provided') {"); - ret.add("\t\t\t\tprint(paste0('There were no cells remaining after the subset: ', '" + subsetEscaped + "'))"); - ret.add("\t\t\t}"); - ret.add("\t\t})"); - ret.add(""); + if (useDplyr) + { + ret.add("\t\t\tcells <- rownames(seuratObj@meta.data |> dplyr::filter( " + subset + " ))"); + } + else + { + ret.add("\t\tcells <- c()"); + ret.add("\t\ttryCatch({"); + ret.add("\t\t\tcells <- WhichCells(seuratObj, expression = " + subset + ")"); + ret.add("\t\t}, error = function(e){"); + ret.add("\t\t\tif (!is.null(e) && e$message == 'Cannot find cells provided') {"); + ret.add("\t\t\t\tprint(paste0('There were no cells remaining after the subset: ', '" + subsetEscaped + "'))"); + ret.add("\t\t\t}"); + ret.add("\t\t})"); + ret.add(""); + } + ret.add("\t\tif (length(cells) == 0) {"); ret.add("\t\t\tprint(paste0('There were no cells after subsetting for dataset: ', datasetId, ', with subset: ', '" + subsetEscaped + "'))"); ret.add("\t\t\tseuratObj <- NULL"); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java index eb7ae055d..d4ddc7326 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java @@ -1,5 +1,6 @@ package org.labkey.singlecell.pipeline.singlecell; +import au.com.bytecode.opencsv.CSVReader; import htsjdk.samtools.util.IOUtil; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; @@ -9,9 +10,11 @@ import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.reader.Readers; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; @@ -40,8 +43,8 @@ public class VireoHandler extends AbstractParameterizedOutputHandler(PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js")), Arrays.asList( - ToolParameterDescriptor.create("nDonors", "# Donors", "The number of donors to demultiplex", "ldk-integerfield", new JSONObject(){{ - put("allowBlank", false); + ToolParameterDescriptor.create("nDonors", "# Donors", "The number of donors to demultiplex. This can be blank only if a reference VCF is provided.", "ldk-integerfield", new JSONObject(){{ + }}, null), ToolParameterDescriptor.create("maxDepth", "Max Depth", "At a position, read maximally INT reads per input file, to avoid excessive memory usage", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); @@ -111,6 +114,13 @@ public void init(JobContext ctx, List inputFiles, List inputFiles, JobContext c File barcodesGz = getBarcodesFile(inputFiles.get(0).getFile()); File bam = getBamFile(inputFiles.get(0).getFile()); - File barcodes = new File(ctx.getWorkingDirectory(), "barcodes.csv"); + File barcodes = new File(ctx.getWorkingDirectory(), "barcodes.tsv"); try (BufferedReader reader = IOUtil.openFileForBufferedUtf8Reading(barcodesGz); PrintWriter writer = PrintWriters.getPrintWriter(barcodes)) { String line; @@ -201,12 +211,6 @@ public void processFilesRemote(List inputFiles, JobContext c cellsnp.add(maxThreads.toString()); } - cellsnp.add("--minMAF"); - cellsnp.add("0.1"); - - cellsnp.add("--minCOUNT"); - cellsnp.add("100"); - String maxDepth = StringUtils.trimToNull(ctx.getParams().optString("maxDepth")); if (maxDepth != null) { @@ -239,6 +243,12 @@ public void processFilesRemote(List inputFiles, JobContext c cellsnp.add("--chrom"); cellsnp.add(contigs); } + + cellsnp.add("--minMAF"); + cellsnp.add("0.1"); + + cellsnp.add("--minCOUNT"); + cellsnp.add("100"); } new SimpleScriptWrapper(ctx.getLogger()).execute(cellsnp); @@ -256,6 +266,46 @@ public void processFilesRemote(List inputFiles, JobContext c } } + File cellSnpBaseVcf = new File(cellsnpDir, "cellSNP.base.vcf.gz"); + if (!cellSnpBaseVcf.exists()) + { + throw new PipelineJobException("Unable to find cellsnp base VCF"); + } + + + File cellSnpCellsVcf = new File(cellsnpDir, "cellSNP.cells.vcf.gz"); + if (!cellSnpCellsVcf.exists()) + { + throw new PipelineJobException("Unable to find cellsnp calls VCF"); + } + + int vcfFile = ctx.getParams().optInt(REF_VCF, -1); + File refVcfSubset = null; + if (vcfFile > -1) + { + File vcf = ctx.getSequenceSupport().getCachedData(vcfFile); + if (vcf == null || !vcf.exists()) + { + throw new PipelineJobException("Unable to find file with ID: " + vcfFile); + } + + refVcfSubset = new File(ctx.getWorkingDirectory(), vcf.getName()); + BcftoolsRunner bcftoolsRunner = new BcftoolsRunner(ctx.getLogger()); + bcftoolsRunner.execute(Arrays.asList( + BcftoolsRunner.getBcfToolsPath().getAbsolutePath(), + "view", + vcf.getPath(), + "-R", + cellSnpCellsVcf.getPath(), + "-Oz", + "-o", + refVcfSubset.getPath() + )); + + ctx.getFileManager().addIntermediateFile(refVcfSubset); + ctx.getFileManager().addIntermediateFile(new File(refVcfSubset.getPath() + ".tbi")); + } + List vireo = new ArrayList<>(); vireo.add("vireo"); vireo.add("-c"); @@ -270,15 +320,20 @@ public void processFilesRemote(List inputFiles, JobContext c vireo.add("-o"); vireo.add(ctx.getWorkingDirectory().getPath()); - int nDonors = ctx.getParams().optInt("nDonors", 0); boolean storeCellSnpVcf = ctx.getParams().optBoolean("storeCellSnpVcf", false); - if (nDonors == 0) + if (refVcfSubset != null) { - throw new PipelineJobException("Must provide nDonors"); + vireo.add("-d"); + vireo.add(refVcfSubset.getPath()); } - vireo.add("-N"); - vireo.add(String.valueOf(nDonors)); + // Note: this value should be checked in init(). It is required only if refVCF is null + int nDonors = ctx.getParams().optInt("nDonors", -1); + if (nDonors > -1) + { + vireo.add("-N"); + vireo.add(String.valueOf(nDonors)); + } if (nDonors == 1) { @@ -312,54 +367,81 @@ else if (outFiles.length > 1) so.setName(inputFiles.get(0).getName() + ": Vireo Demultiplexing"); } so.setCategory("Vireo Demultiplexing"); - ctx.addSequenceOutput(so); - } + StringBuilder description = new StringBuilder(); + if (vcfFile > -1) + { + description.append("Reference VCF ID: \n").append(vcfFile); + } - File cellSnpBaseVcf = new File(cellsnpDir, "cellSNP.base.vcf.gz"); - if (!cellSnpBaseVcf.exists()) - { - throw new PipelineJobException("Unable to find cellsnp base VCF"); - } + File summary = new File(ctx.getOutputDir(), "summary.tsv"); + if (!summary.exists()) + { + throw new PipelineJobException("Missing file: " + summary.getPath()); + } + description.append("Results:\n"); + try (CSVReader reader = new CSVReader(Readers.getReader(summary), '\t')) + { + String[] line; + while ((line = reader.readNext()) != null) + { + if ("Var1".equals(line[0])) + { + continue; + } - File cellSnpCellsVcf = new File(cellsnpDir, "cellSNP.cells.vcf.gz"); - if (!cellSnpCellsVcf.exists()) - { - throw new PipelineJobException("Unable to find cellsnp calls VCF"); - } + description.append(line[0]).append(": ").append(line[1]).append("\n"); + } + } + catch (IOException e) + { + throw new PipelineJobException(e); + } - sortAndFixVcf(cellSnpBaseVcf, genome, ctx.getLogger()); - sortAndFixVcf(cellSnpCellsVcf, genome, ctx.getLogger()); + so.setDescription(StringUtils.trimToEmpty(description.toString())); + ctx.addSequenceOutput(so); + } if (storeCellSnpVcf) { + File fixedVcf = sortAndFixVcf(cellSnpBaseVcf, genome, ctx.getLogger(), ctx.getWorkingDirectory()); + SequenceOutputFile so = new SequenceOutputFile(); so.setReadset(inputFiles.get(0).getReadset()); so.setLibrary_id(inputFiles.get(0).getLibrary_id()); - so.setFile(cellSnpCellsVcf); + so.setFile(fixedVcf); if (so.getReadset() != null) { so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Cellsnp-lite VCF"); } else { - so.setName(inputFiles.get(0).getName() + ": Cellsnp-lite VCF"); + so.setName(inputFiles.get(0).getName() + ": Cellsnp-lite Base VCF"); } so.setCategory("VCF File"); ctx.addSequenceOutput(so); } + else + { + ctx.getFileManager().addIntermediateFile(cellSnpBaseVcf.getParentFile()); + } } - private void sortAndFixVcf(File vcf, ReferenceGenome genome, Logger log) throws PipelineJobException + private File sortAndFixVcf(File vcf, ReferenceGenome genome, Logger log, File outDir) throws PipelineJobException { + File outVcf = new File(outDir, vcf.getName()); + // This original VCF is generally not properly sorted, and has an invalid index. This is redundant, the VCF is not that large: try { - SequencePipelineService.get().sortROD(vcf, log, 2); - SequenceAnalysisService.get().ensureVcfIndex(vcf, log, true); + FileUtils.copyFile(vcf, outVcf); + SequencePipelineService.get().sortROD(outVcf, log, 2); + SequenceAnalysisService.get().ensureVcfIndex(outVcf, log, true); + + new UpdateVCFSequenceDictionary(log).execute(outVcf, genome.getSequenceDictionary()); + SequenceAnalysisService.get().ensureVcfIndex(outVcf, log); - new UpdateVCFSequenceDictionary(log).execute(vcf, genome.getSequenceDictionary()); - SequenceAnalysisService.get().ensureVcfIndex(vcf, log); + return outVcf; } catch (IOException e) {