Skip to content

Commit

Permalink
Implementing new way to control the protein loop overlap for hicHyper…
Browse files Browse the repository at this point in the history
…optDetectLoops and improve docs of hicValidateLocations. Feature request #723
  • Loading branch information
joachimwolff committed Jul 8, 2021
1 parent 7ec52d9 commit 100b2bf
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 20 deletions.
16 changes: 10 additions & 6 deletions hicexplorer/hicHyperoptDetectLoops.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ def parse_arguments(args=None):
default=10000,
help='Resolution of matrix'
' (Default: %(default)s).')
parserOpt.add_argument('--chrPrefixLoops', '-cl',
help='Adding / removing / do nothing a \'chr\'-prefix to chromosome name of the loops.',
choices=[None, 'add', 'remove'],
default=None
)
parserOpt.add_argument('--threads', '-t',
help='Number of threads (uses the python multiprocessing module)'
' (Default: %(default)s).',
Expand All @@ -70,14 +75,13 @@ def parse_arguments(args=None):
return parser


def compute_score(pLoopFile, pProteinFile, pMaximumNumberOfLoops, pResolution):
def compute_score(pLoopFile, pProteinFile, pMaximumNumberOfLoops, pResolution, pChrPrefixLoops):
with open(pLoopFile, 'r') as file:
lines = file.readlines()
if len(lines) == 0:
return 1
outfile_statistics = NamedTemporaryFile()
args = "--data {} --protein {} -cl --resolution {} --outFileName {}".format(pLoopFile, pProteinFile, pResolution, outfile_statistics.name).split()
print(args)
args = "--data {} --protein {} -cl {} --resolution {} --outFileName {}".format(pLoopFile, pProteinFile, pChrPrefixLoops, pResolution, outfile_statistics.name).split()
hicValidateLocations.main(args)
data_dict = {}

Expand Down Expand Up @@ -108,15 +112,14 @@ def objective(pArgs):
pArgs['maxLoopDistance'], pArgs['threads'], pArgs['threads']).split()
hicDetectLoops.main(args)

error_score = compute_score(outfile_loop.name, pArgs['proteinFile'], pArgs['maximumNumberOfLoops'], pArgs['resolution'])
error_score = compute_score(outfile_loop.name, pArgs['proteinFile'], pArgs['maximumNumberOfLoops'], pArgs['resolution'], pArgs['chrPrefixLoops'])
print('Error score: {}'.format(error_score))
return error_score


def main(args=None):

args = parse_arguments().parse_args(args)

space = {

'pit': hp.uniform('pit', 0, 100),
Expand All @@ -130,7 +133,8 @@ def main(args=None):
'proteinFile': args.proteinFile,
'maximumNumberOfLoops': args.maximumNumberOfLoops,
'resolution': args.resolution,
'threads': args.threads
'threads': args.threads,
'chrPrefixLoops': args.chrPrefixLoops

}

Expand Down
4 changes: 2 additions & 2 deletions hicexplorer/hicValidateLocations.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ def parse_arguments(args=None):
'First file contains all loop locations with protein location matches, second file contains statistics about this matching.'
)
parserOpt.add_argument('--chrPrefixLoops', '-cl',
help='Adding a \'chr\'-prefix to chromosome name of the loops.',
help='Adding / removing / do nothing a \'chr\'-prefix to chromosome name of the loops.',
choices=[None, 'add', 'remove'],
default=None
)
parserOpt.add_argument('--chrPrefixProtein', '-cp',
help='Adding a \'chr\'-prefix to chromosome name of the protein.',
help='Adding / removing / do nothing a \'chr\'-prefix to chromosome name of the protein.',
choices=[None, 'add', 'remove'],
default=None
)
Expand Down
39 changes: 27 additions & 12 deletions hicexplorer/test/general/test_hicHyperoptDetectLoops.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,29 +22,44 @@

def are_files_equal(file1, file2, delta=None):
equal = True
if delta:
mismatches = 0
# if delta:
mismatches = 0
with open(file1) as textfile1, open(file2) as textfile2:
for x, y in zip(textfile1, textfile2):
if x.startswith('File'):
continue
if x != y:
if delta:
mismatches += 1
if mismatches > delta:
equal = False
break
else:
equal = False
break
mismatches += 1
if mismatches > delta:
return False
return equal


def test_main():
outfile = NamedTemporaryFile(suffix='.txt', delete=True)

outfile.close()
args = "--matrix {} -p {} -ml {} -r {} --runs {} -o {}".format(
ROOT + "hicDetectLoops/GSE63525_GM12878_insitu_primary_2_5mb.cool",
ROOT + 'hicHyperoptDectedLoops/ctcf_sorted.bed', 3210, 10000, 2, outfile.name).split()
hicHyperoptDetectLoops.main(args)
are_files_equal(outfile.name, ROOT + 'hicHyperoptDetectLoops/hyperopt_result.txt', delta=2)
assert are_files_equal(outfile.name, ROOT + 'hicHyperoptDetectLoops/hyperopt_result.txt', delta=2)


def test_main_add():
outfile = NamedTemporaryFile(suffix='.txt', delete=True)
outfile.close()
args = "--matrix {} -p {} -ml {} -r {} --runs {} -o {} -cl {}".format(
ROOT + "hicDetectLoops/GSE63525_GM12878_insitu_primary_2_5mb.cool",
ROOT + 'hicHyperoptDectedLoops/ctcf_sorted.bed', 3210, 10000, 2, outfile.name, 'add').split()
hicHyperoptDetectLoops.main(args)
assert are_files_equal(outfile.name, ROOT + 'hicHyperoptDetectLoops/hyperopt_result.txt', delta=2)


def test_main_remove():
outfile = NamedTemporaryFile(suffix='.txt', delete=True)
outfile.close()
args = "--matrix {} -p {} -ml {} -r {} --runs {} -o {} -cl {}".format(
ROOT + "hicDetectLoops/GSE63525_GM12878_insitu_primary_2_5mb.cool",
ROOT + 'hicHyperoptDectedLoops/ctcf_sorted.bed', 3210, 10000, 2, outfile.name, 'remove').split()
hicHyperoptDetectLoops.main(args)
assert are_files_equal(outfile.name, ROOT + 'hicHyperoptDetectLoops/hyperopt_result.txt', delta=2)

0 comments on commit 100b2bf

Please sign in to comment.