From 2033668adc5aa510d6581517d940c9d9773ff12a Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Sat, 12 Dec 2015 04:53:00 -0800 Subject: [PATCH 01/12] blank file to start hiveplots branch --- tools/vawk.cwl | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tools/vawk.cwl diff --git a/tools/vawk.cwl b/tools/vawk.cwl new file mode 100644 index 000000000..e69de29bb From 5a1dceb59e1af32d4759e659b661511532f9ee61 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Sat, 12 Dec 2015 05:25:28 -0800 Subject: [PATCH 02/12] initial description of vawk --- tools/vawk.cwl | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/tools/vawk.cwl b/tools/vawk.cwl index e69de29bb..36bb67883 100644 --- a/tools/vawk.cwl +++ b/tools/vawk.cwl @@ -0,0 +1,108 @@ +#!/usr/bin/env cwl-runner + +"@context": + "foaf": "http://xmlns.com/foaf/0.1/" + "doap": "http://usefulinc.com/ns/doap" + "adms": "http://purl.org/adms/" + "admssw": "http://purl.org/adms/sw/" + +adms:Asset: + admssw:SoftwareProject: + doap:name: "vawk" + doap:description: > + An awk-like VCF parser + doap:homepage: "https://github.com/cc2qe/vawk" + doap:repository: + - doap:GitRepository: + doap:location: "https://github.com/cc2qe/vawk.git" + doap:release: + - doap:revision: "0.0.1" + doap:license: "None" + doap:category: "commandline tool" + doap:programming-language: "Python" + doap:developer: + - foaf:Person: + foaf:name: "Colby Chiang" + foaf:Organization: "Washington University" + adms:AssetDistribution: + doap:name: "vawk.cwl" + doap:description: "Developed for CWL consortium http://commonwl.org/" + doap:specification: "http://common-workflow-language.github.io/draft-3/" + doap:release: "cwl:draft-3.dev2" + doap:homepage: "http://commonwl.org/" + doap:location: "https://github.com/common-workflow-language/workflows/blob/master/tools/vawk.cwl" + doap:repository: + - doap:GitRepository: + doap:location: "https://github.com/common-workflow-language/workflows" + doap:maintainer: + foaf:Person: + foaf:openid: "http://orcid.org/0000-0002-2961-9670" + foaf:name: "Michael R. Crusoe" + foaf:mbox: "mailto:crusoe@ucdavis.edu" + foaf:organization: "University of California, Davis" + +cwlVersion: "cwl:draft-3.dev2" + +class: CommandLineTool + +description: | + usage: vawk [-h] [-v VAR] [-c INFO_COL] [--header] [--debug] cmd [vcf] + positional arguments: + cmd vawk command syntax is exactly the same as awk syntax with + a few additional features. The INFO field can be split using + the I$ prefix and the SAMPLE field can be split using + the S$ prefix. For example, I$AF prints the allele frequency of + each variant and S$NA12878 prints the entire SAMPLE field for the + NA12878 individual for each variant. S$* returns all samples. + The SAMPLE field can be further split based on the keys in the + FORMAT field of the VCF (column 9). For example, S$NA12877$GT + returns the genotype of the NA12878 individual. + ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' + vcf VCF file (default: stdin) + optional arguments: + -h, --help show this help message and exit + -v VAR, --var VAR declare an external variable (e.g.: SIZE=10000) + -c INFO_COL, --col INFO_COL + column of the INFO field [8] + --header print VCF header + --debug debugging level verbosity + +requirements: + - "@import": envvar-global.cwl + +inputs: + - id: "#cmd" + type: string + description: | + vawk command syntax is exactly the same as awk syntax with a few + additional features. The INFO field can be split using the I$ prefix + and the SAMPLE field can be split using the S$ prefix. For example, + I$AF prints the allele frequency of each variant and S$NA12878 prints + the entire SAMPLE field for the NA12878 individual for each variant. + S$* returns all samples. The SAMPLE field can be further split based on + the keys in the FORMAT field of the VCF (column 9). For example, + S$NA12877$GT returns the genotype of the NA12878 individual. + ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' + inputBinding: + position: 1 + streamable: true + + - id: "#input" + type: File + description: | + VCF file + inputBinding: + position: 2 + +stdout: + "output.vcf" + +outputs: + - id: "#sorted" + type: File + description: "The resulting VCF file" + streamable: true + outputBinding: "output.vcf" + + +baseCommand: ["vawk"] From 6775ba99671228145795f682f5400deb7c2d5042 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Sat, 12 Dec 2015 05:43:01 -0800 Subject: [PATCH 03/12] tabs to spaces --- tools/vawk.cwl | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/vawk.cwl b/tools/vawk.cwl index 36bb67883..8d621357c 100644 --- a/tools/vawk.cwl +++ b/tools/vawk.cwl @@ -41,31 +41,31 @@ adms:Asset: foaf:mbox: "mailto:crusoe@ucdavis.edu" foaf:organization: "University of California, Davis" -cwlVersion: "cwl:draft-3.dev2" +cwlVersion: "cwl:draft-3.dev3" class: CommandLineTool description: | - usage: vawk [-h] [-v VAR] [-c INFO_COL] [--header] [--debug] cmd [vcf] - positional arguments: - cmd vawk command syntax is exactly the same as awk syntax with - a few additional features. The INFO field can be split using - the I$ prefix and the SAMPLE field can be split using - the S$ prefix. For example, I$AF prints the allele frequency of - each variant and S$NA12878 prints the entire SAMPLE field for the - NA12878 individual for each variant. S$* returns all samples. - The SAMPLE field can be further split based on the keys in the - FORMAT field of the VCF (column 9). For example, S$NA12877$GT - returns the genotype of the NA12878 individual. - ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' - vcf VCF file (default: stdin) - optional arguments: - -h, --help show this help message and exit - -v VAR, --var VAR declare an external variable (e.g.: SIZE=10000) - -c INFO_COL, --col INFO_COL - column of the INFO field [8] - --header print VCF header - --debug debugging level verbosity + usage: vawk [-h] [-v VAR] [-c INFO_COL] [--header] [--debug] cmd [vcf] + positional arguments: + cmd vawk command syntax is exactly the same as awk syntax with + a few additional features. The INFO field can be split using + the I$ prefix and the SAMPLE field can be split using + the S$ prefix. For example, I$AF prints the allele frequency of + each variant and S$NA12878 prints the entire SAMPLE field for the + NA12878 individual for each variant. S$* returns all samples. + The SAMPLE field can be further split based on the keys in the + FORMAT field of the VCF (column 9). For example, S$NA12877$GT + returns the genotype of the NA12878 individual. + ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' + vcf VCF file (default: stdin) + optional arguments: + -h, --help show this help message and exit + -v VAR, --var VAR declare an external variable (e.g.: SIZE=10000) + -c INFO_COL, --col INFO_COL + column of the INFO field [8] + --header print VCF header + --debug debugging level verbosity requirements: - "@import": envvar-global.cwl @@ -74,15 +74,15 @@ inputs: - id: "#cmd" type: string description: | - vawk command syntax is exactly the same as awk syntax with a few - additional features. The INFO field can be split using the I$ prefix + vawk command syntax is exactly the same as awk syntax with a few + additional features. The INFO field can be split using the I$ prefix and the SAMPLE field can be split using the S$ prefix. For example, I$AF prints the allele frequency of each variant and S$NA12878 prints the entire SAMPLE field for the NA12878 individual for each variant. S$* returns all samples. The SAMPLE field can be further split based on the keys in the FORMAT field of the VCF (column 9). For example, S$NA12877$GT returns the genotype of the NA12878 individual. - ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' + ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' inputBinding: position: 1 streamable: true From 7c5d9e21f025b2e0afd398c86d2bcceb4789e31e Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Sat, 12 Dec 2015 05:43:13 -0800 Subject: [PATCH 04/12] update to draft3.dev3 --- tools/vawk.cwl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/vawk.cwl b/tools/vawk.cwl index 8d621357c..055d47077 100644 --- a/tools/vawk.cwl +++ b/tools/vawk.cwl @@ -1,10 +1,16 @@ #!/usr/bin/env cwl-runner -"@context": - "foaf": "http://xmlns.com/foaf/0.1/" - "doap": "http://usefulinc.com/ns/doap" - "adms": "http://purl.org/adms/" - "admssw": "http://purl.org/adms/sw/" +$namespaces: + - foaf: "http://xmlns.com/foaf/0.1/" + - doap: "http://usefulinc.com/ns/doap" + - adms: "http://purl.org/adms/" + - admssw: "http://purl.org/adms/sw/" + +$schemas: + - https://joinup.ec.europa.eu/svn/adms_foss/adms_sw_v1.00/adms_sw_v1.00.rdf + - http://xmlns.com/foaf/spec/20140114.rdf + - http://usefulinc.com/ns/doap# + - http://www.w3.org/ns/adms adms:Asset: admssw:SoftwareProject: From 8afb57f879b57cb7707aec148cb39099b8b1266d Mon Sep 17 00:00:00 2001 From: Roman Valls Guimera Date: Sat, 12 Dec 2015 14:54:39 +0100 Subject: [PATCH 05/12] Initial cwl tool for vcftobedpe from hall-lab/svtools --- tools/hall-lab-svtools-vcftobedpe.cwl | 73 +++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100755 tools/hall-lab-svtools-vcftobedpe.cwl diff --git a/tools/hall-lab-svtools-vcftobedpe.cwl b/tools/hall-lab-svtools-vcftobedpe.cwl new file mode 100755 index 000000000..c502974ae --- /dev/null +++ b/tools/hall-lab-svtools-vcftobedpe.cwl @@ -0,0 +1,73 @@ +#!/usr/bin/env cwl-runner + +$namespaces: + "cwl": "https://w3id.org/cwl/cwl#" + "foaf": "http://xmlns.com/foaf/0.1/" + "doap": "http://usefulinc.com/ns/doap" + "adms": "http://purl.org/adms/" + "admssw": "http://purl.org/adms/sw/" + +$schemas: + - https://joinup.ec.europa.eu/svn/adms_foss/adms_sw_v1.00/adms_sw_v1.00.rdf + - http://xmlns.com/foaf/spec/20140114.rdf + - http://usefulinc.com/ns/doap# + - http://www.w3.org/ns/adms + +adms:Asset: + admssw:SoftwareProject: + doap:name: "svtools" + doap:description: > + Comprehensive utilities to explore structural variations in genomes. + doap:homepage: "https://github.com/hall-lab/svtools" + doap:repository: + - doap:GitRepository: + doap:location: "https://github.com/hall-lab/svtools" + doap:release: + - doap:revision: "0.0.1-44188e60c44c4" + doap:license: "no license" + doap:category: "commandline tool" + doap:programming-language: "Python" + adms:AssetDistribution: + doap:name: "samtools-index.cwl" + doap:description: "Developed for CWL consortium http://commonwl.org/" + doap:specification: "http://common-workflow-language.github.io/draft-3/" + doap:release: "cwl:draft-3.dev2" + doap:homepage: "http://commonwl.org/" + doap:location: "https://github.com/common-workflow-language/workflows/blob/master/tools/hall-lab-svtools.cwl" + doap:repository: + - doap:GitRepository: + doap:location: "https://github.com/common-workflow-language/workflows" + +cwlVersion: "cwl:draft-3.dev2" + +class: CommandLineTool + +description: | + Usage: vcftobedbpe -i -o [out.bedpe] + +requirements: + - "@import": envvar-global.cwl + +inputs: + - id: "#input" + type: File + description: | + "Input vcf file." + inputBinding: + prefix: "-i" + + +outputs: + - id: "#output" + type: File + description: "The bedpe file" + outputBinding: + prefix: "-o" + +baseCommand: ["samtools"] + +arguments: + - valueFrom: $(inputs.bai?'-b':inputs.csi?'-c':[]) + position: 1 + - valueFrom: $(new_ext()) + position: 3 From 75244b001211f0c7b632ead0d45569dd35a23270 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Sat, 12 Dec 2015 06:10:17 -0800 Subject: [PATCH 06/12] add sample job, fix vawk def --- tools/jobs/vawk-job.json | 7 +++++ tools/vawk.cwl | 64 ++++++---------------------------------- 2 files changed, 16 insertions(+), 55 deletions(-) create mode 100644 tools/jobs/vawk-job.json diff --git a/tools/jobs/vawk-job.json b/tools/jobs/vawk-job.json new file mode 100644 index 000000000..b3a22608e --- /dev/null +++ b/tools/jobs/vawk-job.json @@ -0,0 +1,7 @@ +{ + "input": { + "class": "File", + "path": "../test-files/APGI2049_Tumor-manta.vcf" + }, + "cmd": "{ print $1 }" +} diff --git a/tools/vawk.cwl b/tools/vawk.cwl index 055d47077..a96a67567 100644 --- a/tools/vawk.cwl +++ b/tools/vawk.cwl @@ -1,56 +1,12 @@ #!/usr/bin/env cwl-runner -$namespaces: - - foaf: "http://xmlns.com/foaf/0.1/" - - doap: "http://usefulinc.com/ns/doap" - - adms: "http://purl.org/adms/" - - admssw: "http://purl.org/adms/sw/" - -$schemas: - - https://joinup.ec.europa.eu/svn/adms_foss/adms_sw_v1.00/adms_sw_v1.00.rdf - - http://xmlns.com/foaf/spec/20140114.rdf - - http://usefulinc.com/ns/doap# - - http://www.w3.org/ns/adms - -adms:Asset: - admssw:SoftwareProject: - doap:name: "vawk" - doap:description: > - An awk-like VCF parser - doap:homepage: "https://github.com/cc2qe/vawk" - doap:repository: - - doap:GitRepository: - doap:location: "https://github.com/cc2qe/vawk.git" - doap:release: - - doap:revision: "0.0.1" - doap:license: "None" - doap:category: "commandline tool" - doap:programming-language: "Python" - doap:developer: - - foaf:Person: - foaf:name: "Colby Chiang" - foaf:Organization: "Washington University" - adms:AssetDistribution: - doap:name: "vawk.cwl" - doap:description: "Developed for CWL consortium http://commonwl.org/" - doap:specification: "http://common-workflow-language.github.io/draft-3/" - doap:release: "cwl:draft-3.dev2" - doap:homepage: "http://commonwl.org/" - doap:location: "https://github.com/common-workflow-language/workflows/blob/master/tools/vawk.cwl" - doap:repository: - - doap:GitRepository: - doap:location: "https://github.com/common-workflow-language/workflows" - doap:maintainer: - foaf:Person: - foaf:openid: "http://orcid.org/0000-0002-2961-9670" - foaf:name: "Michael R. Crusoe" - foaf:mbox: "mailto:crusoe@ucdavis.edu" - foaf:organization: "University of California, Davis" - -cwlVersion: "cwl:draft-3.dev3" +cwlVersion: "cwl:draft-3.dev2" class: CommandLineTool +requirements: + - "@import": envvar-global.cwl + description: | usage: vawk [-h] [-v VAR] [-c INFO_COL] [--header] [--debug] cmd [vcf] positional arguments: @@ -73,22 +29,19 @@ description: | --header print VCF header --debug debugging level verbosity -requirements: - - "@import": envvar-global.cwl - inputs: - id: "#cmd" type: string description: | - vawk command syntax is exactly the same as awk syntax with a few - additional features. The INFO field can be split using the I$ prefix + vawk command syntax is exactly the same as awk syntax with a few + additional features. The INFO field can be split using the I$ prefix and the SAMPLE field can be split using the S$ prefix. For example, I$AF prints the allele frequency of each variant and S$NA12878 prints the entire SAMPLE field for the NA12878 individual for each variant. S$* returns all samples. The SAMPLE field can be further split based on the keys in the FORMAT field of the VCF (column 9). For example, S$NA12877$GT returns the genotype of the NA12878 individual. - ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' + ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }' inputBinding: position: 1 streamable: true @@ -108,7 +61,8 @@ outputs: type: File description: "The resulting VCF file" streamable: true - outputBinding: "output.vcf" + outputBinding: + glob: "output.vcf" baseCommand: ["vawk"] From d4a17d72a5d9c6c1dc3ba88662443a8c81820d38 Mon Sep 17 00:00:00 2001 From: Roman Valls Guimera Date: Sat, 12 Dec 2015 15:16:16 +0100 Subject: [PATCH 07/12] Stray arguments from samtools, removed --- tools/hall-lab-svtools-vcftobedpe.cwl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/hall-lab-svtools-vcftobedpe.cwl b/tools/hall-lab-svtools-vcftobedpe.cwl index c502974ae..db83eeb86 100755 --- a/tools/hall-lab-svtools-vcftobedpe.cwl +++ b/tools/hall-lab-svtools-vcftobedpe.cwl @@ -65,9 +65,3 @@ outputs: prefix: "-o" baseCommand: ["samtools"] - -arguments: - - valueFrom: $(inputs.bai?'-b':inputs.csi?'-c':[]) - position: 1 - - valueFrom: $(new_ext()) - position: 3 From 7da801802cc4fe6b0ec2f06aa9d26306d8798846 Mon Sep 17 00:00:00 2001 From: Roman Valls Guimera Date: Sat, 12 Dec 2015 15:16:39 +0100 Subject: [PATCH 08/12] Remove all metadata and ontologies stuff... for now --- tools/hall-lab-svtools-vcftobedpe.cwl | 38 --------------------------- 1 file changed, 38 deletions(-) diff --git a/tools/hall-lab-svtools-vcftobedpe.cwl b/tools/hall-lab-svtools-vcftobedpe.cwl index db83eeb86..aea7ad3a7 100755 --- a/tools/hall-lab-svtools-vcftobedpe.cwl +++ b/tools/hall-lab-svtools-vcftobedpe.cwl @@ -1,43 +1,5 @@ #!/usr/bin/env cwl-runner -$namespaces: - "cwl": "https://w3id.org/cwl/cwl#" - "foaf": "http://xmlns.com/foaf/0.1/" - "doap": "http://usefulinc.com/ns/doap" - "adms": "http://purl.org/adms/" - "admssw": "http://purl.org/adms/sw/" - -$schemas: - - https://joinup.ec.europa.eu/svn/adms_foss/adms_sw_v1.00/adms_sw_v1.00.rdf - - http://xmlns.com/foaf/spec/20140114.rdf - - http://usefulinc.com/ns/doap# - - http://www.w3.org/ns/adms - -adms:Asset: - admssw:SoftwareProject: - doap:name: "svtools" - doap:description: > - Comprehensive utilities to explore structural variations in genomes. - doap:homepage: "https://github.com/hall-lab/svtools" - doap:repository: - - doap:GitRepository: - doap:location: "https://github.com/hall-lab/svtools" - doap:release: - - doap:revision: "0.0.1-44188e60c44c4" - doap:license: "no license" - doap:category: "commandline tool" - doap:programming-language: "Python" - adms:AssetDistribution: - doap:name: "samtools-index.cwl" - doap:description: "Developed for CWL consortium http://commonwl.org/" - doap:specification: "http://common-workflow-language.github.io/draft-3/" - doap:release: "cwl:draft-3.dev2" - doap:homepage: "http://commonwl.org/" - doap:location: "https://github.com/common-workflow-language/workflows/blob/master/tools/hall-lab-svtools.cwl" - doap:repository: - - doap:GitRepository: - doap:location: "https://github.com/common-workflow-language/workflows" - cwlVersion: "cwl:draft-3.dev2" class: CommandLineTool From 5ec9ed0714ddba124f246686311127c25c23c063 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Sat, 12 Dec 2015 06:37:45 -0800 Subject: [PATCH 09/12] rename vawk output --- tools/vawk.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/vawk.cwl b/tools/vawk.cwl index a96a67567..0618efdd8 100644 --- a/tools/vawk.cwl +++ b/tools/vawk.cwl @@ -57,7 +57,7 @@ stdout: "output.vcf" outputs: - - id: "#sorted" + - id: "#processed" type: File description: "The resulting VCF file" streamable: true From f576f2a5c100eb5166a2302848649899be7c5aa2 Mon Sep 17 00:00:00 2001 From: Roman Valls Guimera Date: Sat, 12 Dec 2015 15:42:37 +0100 Subject: [PATCH 10/12] Working vcftobedpe cwl tool --- tools/hall-lab-svtools-vcftobedpe.cwl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/hall-lab-svtools-vcftobedpe.cwl b/tools/hall-lab-svtools-vcftobedpe.cwl index aea7ad3a7..6eddab107 100755 --- a/tools/hall-lab-svtools-vcftobedpe.cwl +++ b/tools/hall-lab-svtools-vcftobedpe.cwl @@ -15,15 +15,19 @@ inputs: type: File description: | "Input vcf file." + streamable: true inputBinding: prefix: "-i" +stdout: + "output.bedpe" outputs: - - id: "#output" + - id: "#bedpe" type: File description: "The bedpe file" + streamable: true outputBinding: - prefix: "-o" + glob: "output.bedpe" -baseCommand: ["samtools"] +baseCommand: ["vcftobedpe"] From c04e060b082f805687d18833e928c3ff0a424560 Mon Sep 17 00:00:00 2001 From: Roman Valls Guimera Date: Mon, 14 Dec 2015 13:22:56 +0100 Subject: [PATCH 11/12] Jobfile for vcftobedpe, thanks @mr-c for the heads up --- tools/jobs/vcftobedpe-job.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tools/jobs/vcftobedpe-job.json diff --git a/tools/jobs/vcftobedpe-job.json b/tools/jobs/vcftobedpe-job.json new file mode 100644 index 000000000..2e53ca1c7 --- /dev/null +++ b/tools/jobs/vcftobedpe-job.json @@ -0,0 +1,6 @@ +{ + "input": { + "class": "File", + "path": "../test-files/APGI2049_Tumor-manta.vcf" + }, +} From 80ba93677f6e0301b12c3bdc43d09a84fda1ac83 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 17 Dec 2015 04:18:41 -0800 Subject: [PATCH 12/12] add docker hint --- tools/hall-lab-svtools-vcftobedpe.cwl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/hall-lab-svtools-vcftobedpe.cwl b/tools/hall-lab-svtools-vcftobedpe.cwl index 6eddab107..c73bf288c 100755 --- a/tools/hall-lab-svtools-vcftobedpe.cwl +++ b/tools/hall-lab-svtools-vcftobedpe.cwl @@ -10,6 +10,10 @@ description: | requirements: - "@import": envvar-global.cwl +hints: + - class: DockerRequirement + dockerPull: biocrusoe/hall-lab-svtools + inputs: - id: "#input" type: File