diff --git a/README.md b/README.md index 80aedf1..d080f1c 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,19 @@ VISTA is an integrated SV calling framework that leverages results of individual ## Installation +#### Option A: Install the Python Package + +```shell +pip install git+https://github.com/nahid18/VISTA.git +``` +#### Option B: Clone and Run + ```shell git clone https://github.com/Addicted-to-coding/VISTA/tree/main cd VISTA ``` -## Prerequisites +##### Prerequisites - python 3.8 ```shell @@ -19,12 +26,23 @@ pip install matplotlib ## Usage +If you installed the `python` package, run this command from terminal: +```shell +vista +``` + +If you cloned the original repository, then follow this: + `python vista.py -i [MANTA VCF] [LUMPY VCF] [DELLY VCF] [GENOMESTRIP VCF] [CLEVER VCF] [POPDEL VCF] [OCTOPUS VCF] -s [mouse or human] -o [output folder]` - Note: Input files' tool names should be all in lowercase ## EXAMPLE +- `vista -i manta_HG002.vcf delly_HG002.vcf genomestrip_HG002.vcf octopus_HG002.vcf -o ./results -s human` + +or, + - `python vista.py -i manta_HG002.vcf delly_HG002.vcf genomestrip_HG002.vcf octopus_HG002.vcf -o ./results -s human` ## Command-line Options diff --git a/data/.gitingore b/pyvista/__init__.py similarity index 100% rename from data/.gitingore rename to pyvista/__init__.py diff --git a/scripts/.gitingore b/pyvista/scripts/.gitingore similarity index 100% rename from scripts/.gitingore rename to pyvista/scripts/.gitingore diff --git a/scripts/bedToVcf.py b/pyvista/scripts/bedToVcf.py similarity index 100% rename from scripts/bedToVcf.py rename to pyvista/scripts/bedToVcf.py diff --git a/scripts/circle_del.py b/pyvista/scripts/circle_del.py similarity index 100% rename from scripts/circle_del.py rename to pyvista/scripts/circle_del.py diff --git a/scripts/compare_script.py b/pyvista/scripts/compare_script.py similarity index 100% rename from scripts/compare_script.py rename to pyvista/scripts/compare_script.py diff --git a/scripts/convert_scripts/DELLYtoVCF.py b/pyvista/scripts/convert_scripts/DELLYtoVCF.py similarity index 100% rename from scripts/convert_scripts/DELLYtoVCF.py rename to pyvista/scripts/convert_scripts/DELLYtoVCF.py diff --git a/scripts/convert_scripts/LUMPYtoVCF.py b/pyvista/scripts/convert_scripts/LUMPYtoVCF.py similarity index 100% rename from scripts/convert_scripts/LUMPYtoVCF.py rename to pyvista/scripts/convert_scripts/LUMPYtoVCF.py diff --git a/scripts/convert_scripts/MANTAtoVCF.py b/pyvista/scripts/convert_scripts/MANTAtoVCF.py similarity index 100% rename from scripts/convert_scripts/MANTAtoVCF.py rename to pyvista/scripts/convert_scripts/MANTAtoVCF.py diff --git a/scripts/convert_scripts/OCTOPUStoVCF.py b/pyvista/scripts/convert_scripts/OCTOPUStoVCF.py similarity index 100% rename from scripts/convert_scripts/OCTOPUStoVCF.py rename to pyvista/scripts/convert_scripts/OCTOPUStoVCF.py diff --git a/scripts/convert_scripts/PARLtoVCF.py b/pyvista/scripts/convert_scripts/PARLtoVCF.py similarity index 100% rename from scripts/convert_scripts/PARLtoVCF.py rename to pyvista/scripts/convert_scripts/PARLtoVCF.py diff --git a/scripts/convert_scripts/REFtoVCF.py b/pyvista/scripts/convert_scripts/REFtoVCF.py similarity index 100% rename from scripts/convert_scripts/REFtoVCF.py rename to pyvista/scripts/convert_scripts/REFtoVCF.py diff --git a/pyvista/scripts/data/.gitingore b/pyvista/scripts/data/.gitingore new file mode 100644 index 0000000..e69de29 diff --git a/data/STANDARD_HIGHCONF_BREAKDANCER.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_BREAKDANCER.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_BREAKDANCER.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_BREAKDANCER.vcf diff --git a/data/STANDARD_HIGHCONF_DELLY.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_DELLY.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_DELLY.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_DELLY.vcf diff --git a/data/STANDARD_HIGHCONF_GENOMESTRIP.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_GENOMESTRIP.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_GENOMESTRIP.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_GENOMESTRIP.vcf diff --git a/data/STANDARD_HIGHCONF_GROM.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_GROM.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_GROM.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_GROM.vcf diff --git a/data/STANDARD_HIGHCONF_clever_chrall.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_clever_chrall.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_clever_chrall.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_clever_chrall.vcf diff --git a/data/STANDARD_HIGHCONF_gasv_chrall.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_gasv_chrall.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_gasv_chrall.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_gasv_chrall.vcf diff --git a/data/STANDARD_HIGHCONF_manta.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_manta.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_manta.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_manta.vcf diff --git a/data/STANDARD_HIGHCONF_octopus.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_octopus.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_octopus.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_octopus.vcf diff --git a/data/STANDARD_HIGHCONF_pindel_chrall.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_pindel_chrall.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_pindel_chrall.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_pindel_chrall.vcf diff --git a/data/STANDARD_HIGHCONF_popdel.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_popdel.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_popdel.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_popdel.vcf diff --git a/data/STANDARD_HIGHCONF_smoove_chrall.vcf b/pyvista/scripts/data/STANDARD_HIGHCONF_smoove_chrall.vcf similarity index 100% rename from data/STANDARD_HIGHCONF_smoove_chrall.vcf rename to pyvista/scripts/data/STANDARD_HIGHCONF_smoove_chrall.vcf diff --git a/data/SVPred_HG_del_ALL.vcf b/pyvista/scripts/data/SVPred_HG_del_ALL.vcf similarity index 100% rename from data/SVPred_HG_del_ALL.vcf rename to pyvista/scripts/data/SVPred_HG_del_ALL.vcf diff --git a/data/VISTA_HG_del_ALL.vcf b/pyvista/scripts/data/VISTA_HG_del_ALL.vcf similarity index 100% rename from data/VISTA_HG_del_ALL.vcf rename to pyvista/scripts/data/VISTA_HG_del_ALL.vcf diff --git a/data/dataDrive b/pyvista/scripts/data/dataDrive similarity index 100% rename from data/dataDrive rename to pyvista/scripts/data/dataDrive diff --git a/data/nf_100t.delly.AKR_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.AKR_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.AKR_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.AKR_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.AKR_J_chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.AKR_J_chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.AKR_J_chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.AKR_J_chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.A_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.A_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.A_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.A_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.A_J_chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.A_J_chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.A_J_chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.A_J_chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.BALB_CJ.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.BALB_CJ.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.BALB_CJ.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.BALB_CJ.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.BALB_CJ_chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.BALB_CJ_chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.BALB_CJ_chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.BALB_CJ_chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.C3H_HeJ.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.C3H_HeJ.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.C3H_HeJ.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.C3H_HeJ.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.C3H_HeJ_chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.C3H_HeJ_chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.C3H_HeJ_chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.C3H_HeJ_chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.CBA_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.CBA_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.CBA_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.CBA_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.CBA_J_chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.CBA_J_chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.CBA_J_chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.CBA_J_chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.DBA_2J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.DBA_2J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.DBA_2J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.DBA_2J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.DBA_2J_chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.DBA_2J_chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.DBA_2J_chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.DBA_2J_chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.LP_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.LP_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.LP_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.LP_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.delly.LP_J_chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.delly.LP_J_chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.delly.LP_J_chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.delly.LP_J_chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.manta_diploidSV.AKR_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.manta_diploidSV.AKR_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.manta_diploidSV.AKR_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.manta_diploidSV.AKR_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.manta_diploidSV.A_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.manta_diploidSV.A_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.manta_diploidSV.A_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.manta_diploidSV.A_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.manta_diploidSV.BALB_CJ.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.manta_diploidSV.BALB_CJ.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.manta_diploidSV.BALB_CJ.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.manta_diploidSV.BALB_CJ.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.manta_diploidSV.C3H_HeJ.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.manta_diploidSV.C3H_HeJ.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.manta_diploidSV.C3H_HeJ.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.manta_diploidSV.C3H_HeJ.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.manta_diploidSV.CBA_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.manta_diploidSV.CBA_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.manta_diploidSV.CBA_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.manta_diploidSV.CBA_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.manta_diploidSV.DBA_2J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.manta_diploidSV.DBA_2J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.manta_diploidSV.DBA_2J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.manta_diploidSV.DBA_2J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.manta_diploidSV.LP_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.manta_diploidSV.LP_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.manta_diploidSV.LP_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.manta_diploidSV.LP_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.popdel.AKR_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.popdel.AKR_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.popdel.AKR_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.popdel.AKR_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.popdel.A_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.popdel.A_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.popdel.A_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.popdel.A_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.popdel.BALB_CJ.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.popdel.BALB_CJ.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.popdel.BALB_CJ.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.popdel.BALB_CJ.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.popdel.C3H_HeJ.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.popdel.C3H_HeJ.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.popdel.C3H_HeJ.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.popdel.C3H_HeJ.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.popdel.CBA_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.popdel.CBA_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.popdel.CBA_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.popdel.CBA_J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.popdel.DBA_2J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.popdel.DBA_2J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.popdel.DBA_2J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.popdel.DBA_2J.chr19.100p_sorted.modified.vcf diff --git a/data/nf_100t.popdel.LP_J.chr19.100p_sorted.modified.vcf b/pyvista/scripts/data/nf_100t.popdel.LP_J.chr19.100p_sorted.modified.vcf similarity index 100% rename from data/nf_100t.popdel.LP_J.chr19.100p_sorted.modified.vcf rename to pyvista/scripts/data/nf_100t.popdel.LP_J.chr19.100p_sorted.modified.vcf diff --git a/scripts/num_calls.py b/pyvista/scripts/num_calls.py similarity index 100% rename from scripts/num_calls.py rename to pyvista/scripts/num_calls.py diff --git a/scripts/summarize2.py b/pyvista/scripts/summarize2.py similarity index 100% rename from scripts/summarize2.py rename to pyvista/scripts/summarize2.py diff --git a/scripts/vista_merge.py b/pyvista/scripts/vista_merge.py similarity index 100% rename from scripts/vista_merge.py rename to pyvista/scripts/vista_merge.py diff --git a/vista.py b/pyvista/vista.py similarity index 67% rename from vista.py rename to pyvista/vista.py index 3d1b63e..220ef4a 100644 --- a/vista.py +++ b/pyvista/vista.py @@ -1,5 +1,6 @@ import argparse import subprocess +import sys #AUTHOR: SEUNGMO LEE @@ -55,19 +56,47 @@ def vista(args): raise ValueError("Missing Gold Standard or threshold number!") analyze_vcfs(args.gold, args.threshold, args.output) -if __name__ == "__main__": +def main(): parser = argparse.ArgumentParser( - description="Run VISTA \n Command: python vista.py -i [MANTA VCF] [LUMPY VCF] [DELLY VCF] [GENOMESTRIP VCF] [CLEVER VCF] [POPDEL VCF] [OCTOPUS VCF] -s [mouse or human] -o [output folder]\n Note: Input files' tool names should be all in lowercase", + description="VISTA: An Integrated SV Discovery Framework", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - - parser.add_argument("-i", "--inputs", nargs="+", required=True, help="Input VCF files") - parser.add_argument("-s", "--sample", required=True, choices=["mouse", "human"], help="Sample type") - parser.add_argument("-o", "--output", required=True, help="Output folder path") - parser.add_argument("-g", "--gold", help="Input gold standard VCF") - parser.add_argument("-a", "--analysis", action="store_true", help="Include statistics analysis") - parser.add_argument("-t", "--threshold", help="Threshold for comparison") + + parser.add_argument( + "-i", "--inputs", nargs="+", required=True, + help="Input VCF files to be merged" + ) + parser.add_argument( + "-s", "--sample", required=True, choices=["mouse", "human"], + help="Specify the sample type, either 'mouse' or 'human'" + ) + parser.add_argument( + "-o", "--output", required=True, + help="Output folder where VISTA will be saved" + ) + parser.add_argument( + "-g", "--gold", + help="Provide the path to a single gold standard VCF file" + ) + parser.add_argument( + "-a", "--analysis", action="store_true", + help="Include statistics analysis. If this flag is included, it reports statistics." + ) + parser.add_argument( + "-t", "--threshold", + help="Threshold number for comparison" + ) + + parser.add_argument( + "--version", action="version", version="1.0.0" + ) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) args = parser.parse_args() vista(args) - \ No newline at end of file + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..426fafd --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages + +long_description = 'VISTA: An Integrated SV Discovery Framework' + +setup(name='pyvista', + version='1.0.0', + description='A Python package for VISTA SV discovery framework', + url='https://github.com/Mangul-Lab-USC/VISTA', + author='Mangul Lab', + author_email='', + long_description = long_description, + long_description_content_type ="text/markdown", + license='MIT', + packages=find_packages(), + entry_points={ + 'console_scripts': [ + 'vista = pyvista.vista:main', + ], + }, + zip_safe=False, + install_requires=['pandas', 'numpy', 'matplotlib', 'seaborn', 'scipy', 'PyVCF'] +) \ No newline at end of file