diff --git a/tools/hall-lab-svtools-vcftobedpe.cwl b/tools/hall-lab-svtools-vcftobedpe.cwl new file mode 100755 index 000000000..c73bf288c --- /dev/null +++ b/tools/hall-lab-svtools-vcftobedpe.cwl @@ -0,0 +1,37 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: "cwl:draft-3.dev2" + +class: CommandLineTool + +description: | + Usage: vcftobedbpe -i -o [out.bedpe] + +requirements: + - "@import": envvar-global.cwl + +hints: + - class: DockerRequirement + dockerPull: biocrusoe/hall-lab-svtools + +inputs: + - id: "#input" + type: File + description: | + "Input vcf file." + streamable: true + inputBinding: + prefix: "-i" + +stdout: + "output.bedpe" + +outputs: + - id: "#bedpe" + type: File + description: "The bedpe file" + streamable: true + outputBinding: + glob: "output.bedpe" + +baseCommand: ["vcftobedpe"] diff --git a/tools/jobs/vawk-job.json b/tools/jobs/vawk-job.json new file mode 100644 index 000000000..b3a22608e --- /dev/null +++ b/tools/jobs/vawk-job.json @@ -0,0 +1,7 @@ +{ + "input": { + "class": "File", + "path": "../test-files/APGI2049_Tumor-manta.vcf" + }, + "cmd": "{ print $1 }" +} diff --git a/tools/jobs/vcftobedpe-job.json b/tools/jobs/vcftobedpe-job.json new file mode 100644 index 000000000..2e53ca1c7 --- /dev/null +++ b/tools/jobs/vcftobedpe-job.json @@ -0,0 +1,6 @@ +{ + "input": { + "class": "File", + "path": "../test-files/APGI2049_Tumor-manta.vcf" + }, +} diff --git a/tools/vawk.cwl b/tools/vawk.cwl new file mode 100644 index 000000000..0618efdd8 --- /dev/null +++ b/tools/vawk.cwl @@ -0,0 +1,68 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: "cwl:draft-3.dev2" + +class: CommandLineTool + +requirements: + - "@import": envvar-global.cwl + +description: | + usage: vawk [-h] [-v VAR] [-c INFO_COL] [--header] [--debug] cmd [vcf] + positional arguments: + cmd vawk command syntax is exactly the same as awk syntax with + a few additional features. The INFO field can be split using + the I$ prefix and the SAMPLE field can be split using + the S$ prefix. For example, I$AF prints the allele frequency of + each variant and S$NA12878 prints the entire SAMPLE field for the + NA12878 individual for each variant. S$* returns all samples. + The SAMPLE field can be further split based on the keys in the + FORMAT field of the VCF (column 9). For example, S$NA12877$GT + returns the genotype of the NA12878 individual. + ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' + vcf VCF file (default: stdin) + optional arguments: + -h, --help show this help message and exit + -v VAR, --var VAR declare an external variable (e.g.: SIZE=10000) + -c INFO_COL, --col INFO_COL + column of the INFO field [8] + --header print VCF header + --debug debugging level verbosity + +inputs: + - id: "#cmd" + type: string + description: | + vawk command syntax is exactly the same as awk syntax with a few + additional features. The INFO field can be split using the I$ prefix + and the SAMPLE field can be split using the S$ prefix. For example, + I$AF prints the allele frequency of each variant and S$NA12878 prints + the entire SAMPLE field for the NA12878 individual for each variant. + S$* returns all samples. The SAMPLE field can be further split based on + the keys in the FORMAT field of the VCF (column 9). For example, + S$NA12877$GT returns the genotype of the NA12878 individual. + ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' + inputBinding: + position: 1 + streamable: true + + - id: "#input" + type: File + description: | + VCF file + inputBinding: + position: 2 + +stdout: + "output.vcf" + +outputs: + - id: "#processed" + type: File + description: "The resulting VCF file" + streamable: true + outputBinding: + glob: "output.vcf" + + +baseCommand: ["vawk"]