From 96cb73170f6686d6bdd99aa490c75c7aea95a3db Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 09:36:19 +0000 Subject: [PATCH 01/20] expose the test set generator (used in CSP test) as an opensource utility (NO_JIRA) --- scripts/ReadMe.md | 4 + .../EntryPropertyCalculator.py | 582 ++++++++++++++++++ scripts/refcodes_with_properties/ReadMe.md | 49 ++ .../example_control_file.txt | 19 + .../more_elaborate_control.txt | 29 + .../refcodes_with_properties.py | 72 +++ 6 files changed, 755 insertions(+) create mode 100644 scripts/refcodes_with_properties/EntryPropertyCalculator.py create mode 100644 scripts/refcodes_with_properties/ReadMe.md create mode 100644 scripts/refcodes_with_properties/example_control_file.txt create mode 100644 scripts/refcodes_with_properties/more_elaborate_control.txt create mode 100644 scripts/refcodes_with_properties/refcodes_with_properties.py diff --git a/scripts/ReadMe.md b/scripts/ReadMe.md index 023632f..2cb3a00 100644 --- a/scripts/ReadMe.md +++ b/scripts/ReadMe.md @@ -48,6 +48,10 @@ This folder contains scripts submitted by users or CCDC scientists for anyone to - Calculates the surface charge for a given structure and surface terminations. Runs both from CMD and Mercury. +## Refcodes With Properties + +- A script for generating refcode lists with specific properties from an easy-to-read control file. + ## Tips A section for top tips in using the repository and GitHub. diff --git a/scripts/refcodes_with_properties/EntryPropertyCalculator.py b/scripts/refcodes_with_properties/EntryPropertyCalculator.py new file mode 100644 index 0000000..dda96ea --- /dev/null +++ b/scripts/refcodes_with_properties/EntryPropertyCalculator.py @@ -0,0 +1,582 @@ +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# +# 2025-03-14: created by Jason C. Cole, The Cambridge Crystallographic Data Centre + + +''' +Utility classes for filtering CSD entries based on a property control file +''' + +import ccdc.io + +_filter_classes = {} +def register(cls): + ''' Register a filter class to use in the script. + :param cls: the class to register + ''' + if cls.name() in _filter_classes: + raise ValueError(f"a class with the name {cls.name()} is already registered. Use a different name") + + _filter_classes[cls.name()] = cls + +def filter(name): + return _filter_classes[name] + +def helptext(): + ''' Get help text + ''' + txt = "" + for name in _filter_classes.keys(): + cls = _filter_classes[name] + txt = txt + " %s -> %s," % ( name, cls.helptext() ) + return txt[:-1] + + +class _Filter(object): + + @staticmethod + def name(): + raise NotImplementedError # override this + + @staticmethod + def helptext(): + raise NotImplementedError # override this + + @staticmethod + def argument_pair(): + raise NotImplementedError # override this + +class _ComparativeFilter(_Filter): + def __init__(self, args): + value = False + if eval(args.strip()) == 1: + value = True + + self.expected_value = value + + def value(self): + raise NotImplementedError # override this + + def __call__(self,theobject): + value = self.value(theobject) + return value == self.expected_value + + +class _RangeFilter(_Filter): + def __init__(self, args): + + parts = [ p.strip() for p in args.split() ] + self.minimum = eval(parts[0]) + self.maximum = eval(parts[1]) + + def value(self): + raise NotImplementedError # override this + + def __call__(self,theobject): + value = self.value(theobject) + return value >= self.minimum and value <= self.maximum + +class AllowedAtomicNumbersFilter(_Filter): + def __init__(self,args): + self.allowed_atomic_numbers = [eval(x) for x in args.strip().split()] + + @staticmethod + def name(): + return "allowed atomic numbers" + + @staticmethod + def helptext(): + return "specify a set of atomic numbers (space separated) that the structure can have (and no others)" + + def __call__(self,entry): + try: + molecule = entry.crystal.molecule + return len([x for x in molecule.atoms if x.atomic_number in self.allowed_atomic_numbers]) == len(molecule.atoms) + except TypeError: + return False + +register(AllowedAtomicNumbersFilter) + + +class MustContainAtomicNumbersFilter(_Filter): + def __init__(self,args): + self.must_have_atomic_numbers = [eval(x) for x in args.strip().split()] + + @staticmethod + def name(): + return "must have atomic numbers" + + @staticmethod + def helptext(): + return "specify a set of atomic numbers (space separated) that the structure must have" + + def __call__(self,entry): + try: + molecule = entry.crystal.molecule + + contains = {} + for x in molecule.atoms: + if not contains.has_key(x.atomic_number): + contains[x.atomic_number] = 0 + contains[x.atomic_number] = contains[x.atomic_number] + 1 + for x in self.must_have_atomic_numbers: + if not contains.has_key(x): + return False + + return True + except: + return False + +register(MustContainAtomicNumbersFilter) + +class OrganicFilter(_ComparativeFilter): + def __init__(self, args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "organic" + + @staticmethod + def helptext(): + return "organic entries or not" + + def value(self,entry): + return entry.is_organic + +register(OrganicFilter) + +class PolymericFilter(_ComparativeFilter): + def __init__(self, args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "polymeric" + + @staticmethod + def helptext(): + return "polymeric entries or not" + + def value(self,entry): + return entry.is_polymeric + +register(PolymericFilter) + +class AllHaveSitesFilter(_ComparativeFilter): + def __init__(self, args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "all atoms have sites" + + @staticmethod + def helptext(): + return "whether all atoms have to have sites" + + def value(self,entry): + try: + return entry.crystal.molecule.all_atoms_have_sites + except: + return False + +register(AllHaveSitesFilter) + +class DisorderedFilter(_ComparativeFilter): + def __init__(self, args): + + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "disordered" + + @staticmethod + def helptext(): + return "disordered entries or not" + + def value(self,entry): + return entry.has_disorder + +register(DisorderedFilter) + + +class AtomicWeightFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "atomic weight" + + @staticmethod + def helptext(): + return "specify a range of atomic weight (for the whole structure - not individual molecules)" + + def value(self,entry): + try: + molecule = entry.crystal.molecule + return molecule.molecular_weight + except TypeError: + return 0.0 + +register(AtomicWeightFilter) + +class AtomCountFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "atom count" + + @staticmethod + def helptext(): + return "specify a range of atom counts (for the whole structure - not individual molecules)" + + def value(self,entry): + try: + molecule = entry.crystal.molecule + return len(molecule.atoms) + except TypeError: + return 0 + +register(AtomCountFilter) + +class RotatableBondFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "rotatable bond count" + + @staticmethod + def helptext(): + return "specify the number of rotatable bonds (for the whole structure - not individual molecules)" + + def value(self,entry): + try: + molecule = entry.crystal.molecule + return sum( x.is_rotatable for x in molecule.bonds ) + except TypeError: + return 0 + +register(RotatableBondFilter) + +class DonorCountFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "donor count" + + @staticmethod + def helptext(): + return "specify a donor atom count range (for the whole structure - not individual molecules)" + + def value(self,entry): + try: + molecule = entry.crystal.molecule + return len([x for x in molecule.atoms if x.is_donor]) + except TypeError: + return 0 + +register(DonorCountFilter) + +class AcceptorCountFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "acceptor count" + + @staticmethod + def helptext(): + return "specify an acceptor atom count range (for the whole structure - not individual molecules)" + + def value(self,entry): + try: + molecule = entry.crystal.molecule + return len([x for x in molecule.atoms if x.is_acceptor]) + except TypeError: + return 0 + +register(AcceptorCountFilter) + +class ComponentCountFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "component range" + + @staticmethod + def helptext(): + return "specify a component count range for the whole structure" + + def value(self,entry): + try: + return len(entry.crystal.molecule.components) + except TypeError: + return 0 + +register(ComponentCountFilter) + + +class ZPrimeFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "zprime range" + + @staticmethod + def helptext(): + return "specify a z-prime range" + + def value(self,entry): + return entry.crystal.z_prime + +register(ZPrimeFilter) + + +class RfactorFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "rfactor range" + + @staticmethod + def helptext(): + return "specify r-factor range (in %%)" + + def value(self,entry): + return entry.r_factor + +register(RfactorFilter) + +class SpacegroupNumberFilter(_RangeFilter): + def __init__(self,args): + super(self.__class__,self).__init__(args) + + @staticmethod + def name(): + return "spacegroup number range" + + @staticmethod + def helptext(): + return "specify spacegroup number range" + + def value(self,entry): + return entry.crystal.spacegroup_number_and_setting[0] + +register(SpacegroupNumberFilter) + +class FilterEvaluation(object): + def __init__(self): + self._methods = [] + + def add_filter(self,method): + self._methods.append(method) + + def evaluate(self,entry): + for method in self._methods: + try: + if not method(entry): + return False + except TypeError: + return False + + return True + + def values(self,entry): + values = {} + for method in self._methods: + if hasattr(method,"value"): + try: + values[method.name()] = method.value(entry) + except NotImplementedError: + pass + return values + +def parse_control_file(lines): + evaluator = FilterEvaluation() + for line in lines: + if len(line) > 0 and line[0] != '#': + parts = line.split(":") + if len(parts) > 1: + cls = _filter_classes[parts[0].strip()] + evaluator.add_filter( cls(parts[1]) ) + return evaluator + + + +import unittest + +class TestFiltering(unittest.TestCase): + + def setUp(self): + + self.reader = ccdc.io.EntryReader('CSD') + self.aabhtz = self.reader.entry("AABHTZ") + self.aacani_ten = self.reader.entry("AACANI10") + self.aadamc = self.reader.entry("AADAMC") + self.aadrib = self.reader.entry("AADRIB") + self.abadis = self.reader.entry("ABADIS") + + def test_organic_filter(self): + + test_file = """ +organic : 1 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertTrue(evaluator.evaluate(self.aabhtz)) + + self.assertFalse(evaluator.evaluate(self.aacani_ten)) + + def test_component_filter(self): + test_file = """ +component range : 0 1 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertTrue(evaluator.evaluate(self.aabhtz)) + + self.assertFalse(evaluator.evaluate(self.aacani_ten)) + + def test_donor_count_filter(self): + test_file = """ +donor count : 2 2 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertFalse(evaluator.evaluate(self.aabhtz)) + + self.assertTrue(evaluator.evaluate(self.aadamc)) + + test_file = """ +donor count : 0 3 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertTrue(evaluator.evaluate(self.aadamc)) + + def test_acceptor_count_filter(self): + test_file = """ +acceptor count : 7 7 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + # regards Cl as an acceptor ... + self.assertTrue(evaluator.evaluate(self.aabhtz)) + + self.assertTrue(evaluator.evaluate(self.aacani_ten)) + + + def test_zprime(self): + test_file = """ +zprime range : 0.99 1.01 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) + + def test_atomic_numbers(self): + test_file = """ +allowed atomic numbers : 1 6 7 8 +must have atomic numbers : 1 6 7 8 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertFalse(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) + + test_file = """ +must have atomic numbers : 1 6 7 8 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) + + def test_rotatable_bond_count(self): + test_file = """ +rotatable bond count : 0 3 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.abadis)) + + + def test_multiple(self): + test_file = """ + +# An example control file +# +# +# only include organic structures as output +organic : 1 +# specify a range of donors +donor count : 0 10 +# specify a range of acceptors +acceptor count : 5 5 +# rotatable bond count range +rotatable bond count : 3 7 +# number of atoms to allow through +atom count : 0 100 +# only include structures containing Hydrogen, Carbon, Nitrogen or Oxygen and nothing else +allowed atomic numbers : 1 6 7 8 +# only include structures containing all of these elements (i.e.) Hydrogen, Carbon, Nitrogen or Oxygen +must have atomic numbers : 1 6 7 8 +# Ensure Z-prime is one +zprime range : 0.99 1.01 +# Ensure only one component in the structure +component range : 2 2 +# Dont include disordered structures +disordered : 0 +# Specify an R-factor range +rfactor range : 0.1 5 +# + + +""" + + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + counter = 0 + hits = [] + + test_entries = ['AABHTZ','ABAQEB','ABELEY', 'ADAQOM','ADARAA','ADARAZ','ADUWIG','AFEREK'] + for id in test_entries: + e = self.reader.entry(id) + + if evaluator.evaluate(e): + hits.append(e.identifier) + + self.assertEquals( ['ABAQEB','ABELEY','ADAQOM','ADUWIG','AFEREK'], hits ) + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/refcodes_with_properties/ReadMe.md b/scripts/refcodes_with_properties/ReadMe.md new file mode 100644 index 0000000..6608799 --- /dev/null +++ b/scripts/refcodes_with_properties/ReadMe.md @@ -0,0 +1,49 @@ +# Refcode List Generator + +## Summary +A script that allows you to create refcode lists (or CSV files of properties for a refcode list) for simple properties. The advantage of the script is +that the control is via an easy to read file so you can keep an interprettable record of how a test set was generated in research. You can also then +reproduce the list, or indeed run it on a new database and update it with the same conditions. + +### Relevance +We want research to be FAIR (Findable, Attributable, Interoperable and Reproducible) - this script means we can create a simple decscription of the +test set used that any researcher could then reproduce from the script and the description. + +## Requirements + +- Tested with CSD Python API version 3.9 on Linux and Windows +- ccdc.io +- ccdc.search + +## Licensing Requirements + +- CSD-Core + +## Instructions on Running +### Linux command line +- load the CSD Python API Miniconda environment +- create a text control file with the various control lines specified +- call Python to read the script and specify necessary arguments +~~~ +$ python refcodes_with_properties.py --help +~~~ +The above will print an extended help message that describes the registered + +You can run the script with an Example file. Results are printed by default and can be redirected to be saved in an output file, e.g. +~~~ +$ python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd +~~~ +This will generate a GCD file that can be used in other work. + +### Windows CSD Python API +- launch a CMD window +- Use the installed version of the CSD Python API, for example C:\Users\\CCDC\ccdc-software\csd-python-api assuming the CCDC tools are installed in the ususal place do this +~~~ +C:\Users\\CCDC\ccdc-software\csd-python-api\run_python_api.bat refcodes_with_properties.py --help +~~~ + +## Author + +_Jason C.Cole_ 2025 + +> For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) diff --git a/scripts/refcodes_with_properties/example_control_file.txt b/scripts/refcodes_with_properties/example_control_file.txt new file mode 100644 index 0000000..a511b2d --- /dev/null +++ b/scripts/refcodes_with_properties/example_control_file.txt @@ -0,0 +1,19 @@ +# An example control file - this will find all organic structures +# with up to 100 atoms, Z' = 1, only 1 component that isnt disordered and +# has a low R-Factor +# +# only include organic structures as output +organic : 1 +# number of atoms to allow through +atom count : 0 100 +# Ensure Z-prime is one +zprime range : 0.99 1.01 +# Ensure only one component in the structure +component range : 0 1 +# Dont include disordered structures +disordered : 0 +# Specify an R-factor range +rfactor range : 0.1 5 + + + diff --git a/scripts/refcodes_with_properties/more_elaborate_control.txt b/scripts/refcodes_with_properties/more_elaborate_control.txt new file mode 100644 index 0000000..3590d33 --- /dev/null +++ b/scripts/refcodes_with_properties/more_elaborate_control.txt @@ -0,0 +1,29 @@ +# An example control file +# +# +# only include organic structures as output +organic : 1 +# specify a range of donors +donor count : 0 10 +# specify a range of acceptors +acceptor count : 5 5 +# rotatable bond count range +rotatable bond count : 3 7 +# number of atoms to allow through +atom count : 0 100 +# only include structures containing Hydrogen, Carbon, Nitrogen or Oxygen and nothing else +allowed atomic numbers : 1 6 7 8 +# only include structures containing all of these elements (i.e.) Hydrogen, Carbon, Nitrogen or Oxygen +must have atomic numbers : 1 6 7 8 +# Ensure Z-prime is one +zprime range : 0.99 1.01 +# Ensure only one component in the structure +component range : 0 1 +# Dont include disordered structures +disordered : 0 +# Specify an R-factor range +rfactor range : 0.1 5 +# atomic weight +atomic weight : 0.0 1000.0 + + diff --git a/scripts/refcodes_with_properties/refcodes_with_properties.py b/scripts/refcodes_with_properties/refcodes_with_properties.py new file mode 100644 index 0000000..341d8a1 --- /dev/null +++ b/scripts/refcodes_with_properties/refcodes_with_properties.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# +# 2025-03-14: created by Jason C. Cole, The Cambridge Crystallographic Data Centre + +''' +Filter a refcode list to the subset that have the desired properties +''' + +######################################################################### + +import sys +import os +import argparse +import EntryPropertyCalculator +from ccdc import io + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter ) + + + parser.add_argument( '-r','--refcode_file', help='input file containing the list of refcodes', default = None ) + parser.add_argument( '-d','--database_file', help='input file containing the list of refcodes', default = None ) + parser.add_argument( '-c','--control_file', help='configuration file containing the desired properties\n\n %s' % (EntryPropertyCalculator.helptext()) ) + parser.add_argument( '-v','--get_values', action="store_true", help='calculate and print descriptor values where possible rather than filter\n\n %s' % (EntryPropertyCalculator.helptext()) ) + parser.add_argument( '-o','--output_file', default = None, help='output CSV file for results\n\n %s' % (EntryPropertyCalculator.helptext()) ) + + args = parser.parse_args() + + refcode_file = args.refcode_file + database_file = args.database_file + control_file = args.control_file + print_values = args.get_values + + outfile = sys.stdout + if args.output_file != None: + outfile = open(args.output_file, 'wb') + + filterer = EntryPropertyCalculator.parse_control_file(open(control_file,"r").readlines()) + + reader = None + if refcode_file == None: + if database_file == None: + reader = io.EntryReader('CSD') + else: + reader = io.EntryReader(database_file) + else: + reader = io.EntryReader(refcode_file, format='identifiers') + + if args.get_values: + import csv + csvwriter = None + for entry in reader: + values = filterer.values(entry) + if csvwriter == None: + fieldnames=["identifier"] + values.keys() + csvwriter = csv.DictWriter(outfile, fieldnames=fieldnames) + csvwriter.writeheader() + values["identifier"] = entry.identifier + csvwriter.writerow(values) + + else: + for entry in reader: + if filterer.evaluate(entry): + outfile.write(entry.identifier + "\n") From d778a1ec3857cb3aff5f8f87da84b5f74cf2c4d8 Mon Sep 17 00:00:00 2001 From: Jason Christopher Cole <62337096+jasonccole@users.noreply.github.com> Date: Tue, 18 Mar 2025 12:02:06 +0000 Subject: [PATCH 02/20] Apply flake automatic blank link suggestions from code review (NO_JIRA) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- scripts/refcodes_with_properties/ReadMe.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/refcodes_with_properties/ReadMe.md b/scripts/refcodes_with_properties/ReadMe.md index 6608799..471e9fc 100644 --- a/scripts/refcodes_with_properties/ReadMe.md +++ b/scripts/refcodes_with_properties/ReadMe.md @@ -1,11 +1,13 @@ # Refcode List Generator ## Summary + A script that allows you to create refcode lists (or CSV files of properties for a refcode list) for simple properties. The advantage of the script is that the control is via an easy to read file so you can keep an interprettable record of how a test set was generated in research. You can also then reproduce the list, or indeed run it on a new database and update it with the same conditions. ### Relevance + We want research to be FAIR (Findable, Attributable, Interoperable and Reproducible) - this script means we can create a simple decscription of the test set used that any researcher could then reproduce from the script and the description. @@ -20,14 +22,18 @@ test set used that any researcher could then reproduce from the script and the d - CSD-Core ## Instructions on Running + ### Linux command line + - load the CSD Python API Miniconda environment - create a text control file with the various control lines specified - call Python to read the script and specify necessary arguments + ~~~ -$ python refcodes_with_properties.py --help +python refcodes_with_properties.py --help ~~~ -The above will print an extended help message that describes the registered + +The above will print an extended help message that describes the registered You can run the script with an Example file. Results are printed by default and can be redirected to be saved in an output file, e.g. ~~~ From b0b231cd7111ced880ff22ead2c110277d3c8c69 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 14:23:51 +0000 Subject: [PATCH 03/20] correct for ridiculous reviewdog line complaints (NO_JIRA) --- scripts/refcodes_with_properties/ReadMe.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/refcodes_with_properties/ReadMe.md b/scripts/refcodes_with_properties/ReadMe.md index 471e9fc..0612547 100644 --- a/scripts/refcodes_with_properties/ReadMe.md +++ b/scripts/refcodes_with_properties/ReadMe.md @@ -1,28 +1,34 @@ # Refcode List Generator + ## Summary A script that allows you to create refcode lists (or CSV files of properties for a refcode list) for simple properties. The advantage of the script is that the control is via an easy to read file so you can keep an interprettable record of how a test set was generated in research. You can also then reproduce the list, or indeed run it on a new database and update it with the same conditions. + ### Relevance We want research to be FAIR (Findable, Attributable, Interoperable and Reproducible) - this script means we can create a simple decscription of the test set used that any researcher could then reproduce from the script and the description. + ## Requirements - Tested with CSD Python API version 3.9 on Linux and Windows - ccdc.io - ccdc.search + ## Licensing Requirements - CSD-Core + ## Instructions on Running + ### Linux command line - load the CSD Python API Miniconda environment @@ -41,6 +47,7 @@ $ python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd ~~~ This will generate a GCD file that can be used in other work. + ### Windows CSD Python API - launch a CMD window - Use the installed version of the CSD Python API, for example C:\Users\\CCDC\ccdc-software\csd-python-api assuming the CCDC tools are installed in the ususal place do this @@ -48,6 +55,7 @@ This will generate a GCD file that can be used in other work. C:\Users\\CCDC\ccdc-software\csd-python-api\run_python_api.bat refcodes_with_properties.py --help ~~~ + ## Author _Jason C.Cole_ 2025 From 4cc4d45529c507aa100bfc5685ec84e3a1e8d312 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 14:25:17 +0000 Subject: [PATCH 04/20] review items (NO_JIRA)) --- .../EntryPropertyCalculator.py | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/scripts/refcodes_with_properties/EntryPropertyCalculator.py b/scripts/refcodes_with_properties/EntryPropertyCalculator.py index dda96ea..7b2e725 100644 --- a/scripts/refcodes_with_properties/EntryPropertyCalculator.py +++ b/scripts/refcodes_with_properties/EntryPropertyCalculator.py @@ -15,18 +15,22 @@ import ccdc.io _filter_classes = {} + + def register(cls): - ''' Register a filter class to use in the script. - :param cls: the class to register + ''' Register a filter class to use in the script. + :param cls: the class to register. ''' if cls.name() in _filter_classes: raise ValueError(f"a class with the name {cls.name()} is already registered. Use a different name") _filter_classes[cls.name()] = cls + def filter(name): return _filter_classes[name] + def helptext(): ''' Get help text ''' @@ -41,15 +45,16 @@ class _Filter(object): @staticmethod def name(): - raise NotImplementedError # override this + raise NotImplementedError # override this @staticmethod def helptext(): - raise NotImplementedError # override this + raise NotImplementedError # override this @staticmethod def argument_pair(): - raise NotImplementedError # override this + raise NotImplementedError # override this + class _ComparativeFilter(_Filter): def __init__(self, args): @@ -60,7 +65,7 @@ def __init__(self, args): self.expected_value = value def value(self): - raise NotImplementedError # override this + raise NotImplementedError # override this def __call__(self,theobject): value = self.value(theobject) @@ -71,8 +76,8 @@ class _RangeFilter(_Filter): def __init__(self, args): parts = [ p.strip() for p in args.split() ] - self.minimum = eval(parts[0]) - self.maximum = eval(parts[1]) + self.minimum = float(parts[0]) + self.maximum = float(parts[1]) def value(self): raise NotImplementedError # override this @@ -81,9 +86,10 @@ def __call__(self,theobject): value = self.value(theobject) return value >= self.minimum and value <= self.maximum + class AllowedAtomicNumbersFilter(_Filter): def __init__(self,args): - self.allowed_atomic_numbers = [eval(x) for x in args.strip().split()] + self.allowed_atomic_numbers = [int(x) for x in args.strip().split()] @staticmethod def name(): @@ -105,7 +111,7 @@ def __call__(self,entry): class MustContainAtomicNumbersFilter(_Filter): def __init__(self,args): - self.must_have_atomic_numbers = [eval(x) for x in args.strip().split()] + self.must_have_atomic_numbers = [int(x) for x in args.strip().split()] @staticmethod def name(): @@ -132,8 +138,10 @@ def __call__(self,entry): except: return False + register(MustContainAtomicNumbersFilter) + class OrganicFilter(_ComparativeFilter): def __init__(self, args): super(self.__class__,self).__init__(args) @@ -149,8 +157,10 @@ def helptext(): def value(self,entry): return entry.is_organic + register(OrganicFilter) + class PolymericFilter(_ComparativeFilter): def __init__(self, args): super(self.__class__,self).__init__(args) @@ -166,8 +176,10 @@ def helptext(): def value(self,entry): return entry.is_polymeric + register(PolymericFilter) + class AllHaveSitesFilter(_ComparativeFilter): def __init__(self, args): super(self.__class__,self).__init__(args) @@ -186,8 +198,10 @@ def value(self,entry): except: return False + register(AllHaveSitesFilter) + class DisorderedFilter(_ComparativeFilter): def __init__(self, args): @@ -204,6 +218,7 @@ def helptext(): def value(self,entry): return entry.has_disorder + register(DisorderedFilter) @@ -226,8 +241,10 @@ def value(self,entry): except TypeError: return 0.0 + register(AtomicWeightFilter) + class AtomCountFilter(_RangeFilter): def __init__(self,args): super(self.__class__,self).__init__(args) @@ -247,8 +264,10 @@ def value(self,entry): except TypeError: return 0 + register(AtomCountFilter) + class RotatableBondFilter(_RangeFilter): def __init__(self,args): super(self.__class__,self).__init__(args) @@ -268,8 +287,10 @@ def value(self,entry): except TypeError: return 0 + register(RotatableBondFilter) + class DonorCountFilter(_RangeFilter): def __init__(self,args): super(self.__class__,self).__init__(args) @@ -289,8 +310,10 @@ def value(self,entry): except TypeError: return 0 + register(DonorCountFilter) + class AcceptorCountFilter(_RangeFilter): def __init__(self,args): super(self.__class__,self).__init__(args) @@ -310,8 +333,10 @@ def value(self,entry): except TypeError: return 0 + register(AcceptorCountFilter) + class ComponentCountFilter(_RangeFilter): def __init__(self,args): super(self.__class__,self).__init__(args) @@ -330,6 +355,7 @@ def value(self,entry): except TypeError: return 0 + register(ComponentCountFilter) @@ -348,6 +374,7 @@ def helptext(): def value(self,entry): return entry.crystal.z_prime + register(ZPrimeFilter) @@ -366,8 +393,10 @@ def helptext(): def value(self,entry): return entry.r_factor + register(RfactorFilter) + class SpacegroupNumberFilter(_RangeFilter): def __init__(self,args): super(self.__class__,self).__init__(args) @@ -383,8 +412,10 @@ def helptext(): def value(self,entry): return entry.crystal.spacegroup_number_and_setting[0] + register(SpacegroupNumberFilter) + class FilterEvaluation(object): def __init__(self): self._methods = [] @@ -412,6 +443,7 @@ def values(self,entry): pass return values + def parse_control_file(lines): evaluator = FilterEvaluation() for line in lines: @@ -423,7 +455,6 @@ def parse_control_file(lines): return evaluator - import unittest class TestFiltering(unittest.TestCase): From bb07d18b7b9c0337a4e5013b2f0dc491d54821d2 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 14:28:56 +0000 Subject: [PATCH 05/20] Apply pycharm formatting to hopefully resolve reviewdog (NO_JIRA) --- .../EntryPropertyCalculator.py | 253 +++++++++--------- 1 file changed, 126 insertions(+), 127 deletions(-) diff --git a/scripts/refcodes_with_properties/EntryPropertyCalculator.py b/scripts/refcodes_with_properties/EntryPropertyCalculator.py index 7b2e725..678d59d 100644 --- a/scripts/refcodes_with_properties/EntryPropertyCalculator.py +++ b/scripts/refcodes_with_properties/EntryPropertyCalculator.py @@ -28,7 +28,7 @@ def register(cls): def filter(name): - return _filter_classes[name] + return _filter_classes[name] def helptext(): @@ -36,8 +36,8 @@ def helptext(): ''' txt = "" for name in _filter_classes.keys(): - cls = _filter_classes[name] - txt = txt + " %s -> %s," % ( name, cls.helptext() ) + cls = _filter_classes[name] + txt = txt + " %s -> %s," % (name, cls.helptext()) return txt[:-1] @@ -60,35 +60,34 @@ class _ComparativeFilter(_Filter): def __init__(self, args): value = False if eval(args.strip()) == 1: - value = True + value = True self.expected_value = value def value(self): raise NotImplementedError # override this - def __call__(self,theobject): + def __call__(self, theobject): value = self.value(theobject) return value == self.expected_value class _RangeFilter(_Filter): def __init__(self, args): - - parts = [ p.strip() for p in args.split() ] + parts = [p.strip() for p in args.split()] self.minimum = float(parts[0]) self.maximum = float(parts[1]) def value(self): - raise NotImplementedError # override this + raise NotImplementedError # override this - def __call__(self,theobject): + def __call__(self, theobject): value = self.value(theobject) return value >= self.minimum and value <= self.maximum class AllowedAtomicNumbersFilter(_Filter): - def __init__(self,args): + def __init__(self, args): self.allowed_atomic_numbers = [int(x) for x in args.strip().split()] @staticmethod @@ -99,18 +98,20 @@ def name(): def helptext(): return "specify a set of atomic numbers (space separated) that the structure can have (and no others)" - def __call__(self,entry): + def __call__(self, entry): try: molecule = entry.crystal.molecule - return len([x for x in molecule.atoms if x.atomic_number in self.allowed_atomic_numbers]) == len(molecule.atoms) + return len([x for x in molecule.atoms if x.atomic_number in self.allowed_atomic_numbers]) == len( + molecule.atoms) except TypeError: return False + register(AllowedAtomicNumbersFilter) class MustContainAtomicNumbersFilter(_Filter): - def __init__(self,args): + def __init__(self, args): self.must_have_atomic_numbers = [int(x) for x in args.strip().split()] @staticmethod @@ -121,18 +122,18 @@ def name(): def helptext(): return "specify a set of atomic numbers (space separated) that the structure must have" - def __call__(self,entry): + def __call__(self, entry): try: molecule = entry.crystal.molecule contains = {} for x in molecule.atoms: - if not contains.has_key(x.atomic_number): - contains[x.atomic_number] = 0 - contains[x.atomic_number] = contains[x.atomic_number] + 1 + if not contains.has_key(x.atomic_number): + contains[x.atomic_number] = 0 + contains[x.atomic_number] = contains[x.atomic_number] + 1 for x in self.must_have_atomic_numbers: if not contains.has_key(x): - return False + return False return True except: @@ -144,7 +145,7 @@ def __call__(self,entry): class OrganicFilter(_ComparativeFilter): def __init__(self, args): - super(self.__class__,self).__init__(args) + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -154,7 +155,7 @@ def name(): def helptext(): return "organic entries or not" - def value(self,entry): + def value(self, entry): return entry.is_organic @@ -163,7 +164,7 @@ def value(self,entry): class PolymericFilter(_ComparativeFilter): def __init__(self, args): - super(self.__class__,self).__init__(args) + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -173,7 +174,7 @@ def name(): def helptext(): return "polymeric entries or not" - def value(self,entry): + def value(self, entry): return entry.is_polymeric @@ -182,7 +183,7 @@ def value(self,entry): class AllHaveSitesFilter(_ComparativeFilter): def __init__(self, args): - super(self.__class__,self).__init__(args) + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -192,7 +193,7 @@ def name(): def helptext(): return "whether all atoms have to have sites" - def value(self,entry): + def value(self, entry): try: return entry.crystal.molecule.all_atoms_have_sites except: @@ -204,8 +205,7 @@ def value(self,entry): class DisorderedFilter(_ComparativeFilter): def __init__(self, args): - - super(self.__class__,self).__init__(args) + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -215,7 +215,7 @@ def name(): def helptext(): return "disordered entries or not" - def value(self,entry): + def value(self, entry): return entry.has_disorder @@ -223,8 +223,8 @@ def value(self,entry): class AtomicWeightFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -234,7 +234,7 @@ def name(): def helptext(): return "specify a range of atomic weight (for the whole structure - not individual molecules)" - def value(self,entry): + def value(self, entry): try: molecule = entry.crystal.molecule return molecule.molecular_weight @@ -246,8 +246,8 @@ def value(self,entry): class AtomCountFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -257,7 +257,7 @@ def name(): def helptext(): return "specify a range of atom counts (for the whole structure - not individual molecules)" - def value(self,entry): + def value(self, entry): try: molecule = entry.crystal.molecule return len(molecule.atoms) @@ -269,8 +269,8 @@ def value(self,entry): class RotatableBondFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -280,10 +280,10 @@ def name(): def helptext(): return "specify the number of rotatable bonds (for the whole structure - not individual molecules)" - def value(self,entry): + def value(self, entry): try: molecule = entry.crystal.molecule - return sum( x.is_rotatable for x in molecule.bonds ) + return sum(x.is_rotatable for x in molecule.bonds) except TypeError: return 0 @@ -292,8 +292,8 @@ def value(self,entry): class DonorCountFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -303,7 +303,7 @@ def name(): def helptext(): return "specify a donor atom count range (for the whole structure - not individual molecules)" - def value(self,entry): + def value(self, entry): try: molecule = entry.crystal.molecule return len([x for x in molecule.atoms if x.is_donor]) @@ -315,8 +315,8 @@ def value(self,entry): class AcceptorCountFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -326,7 +326,7 @@ def name(): def helptext(): return "specify an acceptor atom count range (for the whole structure - not individual molecules)" - def value(self,entry): + def value(self, entry): try: molecule = entry.crystal.molecule return len([x for x in molecule.atoms if x.is_acceptor]) @@ -338,8 +338,8 @@ def value(self,entry): class ComponentCountFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -349,7 +349,7 @@ def name(): def helptext(): return "specify a component count range for the whole structure" - def value(self,entry): + def value(self, entry): try: return len(entry.crystal.molecule.components) except TypeError: @@ -360,8 +360,8 @@ def value(self,entry): class ZPrimeFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -371,7 +371,7 @@ def name(): def helptext(): return "specify a z-prime range" - def value(self,entry): + def value(self, entry): return entry.crystal.z_prime @@ -379,8 +379,8 @@ def value(self,entry): class RfactorFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -390,7 +390,7 @@ def name(): def helptext(): return "specify r-factor range (in %%)" - def value(self,entry): + def value(self, entry): return entry.r_factor @@ -398,8 +398,8 @@ def value(self,entry): class SpacegroupNumberFilter(_RangeFilter): - def __init__(self,args): - super(self.__class__,self).__init__(args) + def __init__(self, args): + super(self.__class__, self).__init__(args) @staticmethod def name(): @@ -409,7 +409,7 @@ def name(): def helptext(): return "specify spacegroup number range" - def value(self,entry): + def value(self, entry): return entry.crystal.spacegroup_number_and_setting[0] @@ -420,23 +420,23 @@ class FilterEvaluation(object): def __init__(self): self._methods = [] - def add_filter(self,method): + def add_filter(self, method): self._methods.append(method) - def evaluate(self,entry): + def evaluate(self, entry): for method in self._methods: - try: - if not method(entry): - return False - except TypeError: - return False + try: + if not method(entry): + return False + except TypeError: + return False return True - def values(self,entry): + def values(self, entry): values = {} for method in self._methods: - if hasattr(method,"value"): + if hasattr(method, "value"): try: values[method.name()] = method.value(entry) except NotImplementedError: @@ -447,118 +447,117 @@ def values(self,entry): def parse_control_file(lines): evaluator = FilterEvaluation() for line in lines: - if len(line) > 0 and line[0] != '#': - parts = line.split(":") - if len(parts) > 1: - cls = _filter_classes[parts[0].strip()] - evaluator.add_filter( cls(parts[1]) ) + if len(line) > 0 and line[0] != '#': + parts = line.split(":") + if len(parts) > 1: + cls = _filter_classes[parts[0].strip()] + evaluator.add_filter(cls(parts[1])) return evaluator import unittest + class TestFiltering(unittest.TestCase): def setUp(self): - self.reader = ccdc.io.EntryReader('CSD') - self.aabhtz = self.reader.entry("AABHTZ") - self.aacani_ten = self.reader.entry("AACANI10") - self.aadamc = self.reader.entry("AADAMC") - self.aadrib = self.reader.entry("AADRIB") - self.abadis = self.reader.entry("ABADIS") + self.reader = ccdc.io.EntryReader('CSD') + self.aabhtz = self.reader.entry("AABHTZ") + self.aacani_ten = self.reader.entry("AACANI10") + self.aadamc = self.reader.entry("AADAMC") + self.aadrib = self.reader.entry("AADRIB") + self.abadis = self.reader.entry("ABADIS") def test_organic_filter(self): - test_file = """ + test_file = """ organic : 1 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aacani_ten)) + self.assertFalse(evaluator.evaluate(self.aacani_ten)) def test_component_filter(self): - test_file = """ + test_file = """ component range : 0 1 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aacani_ten)) + self.assertFalse(evaluator.evaluate(self.aacani_ten)) def test_donor_count_filter(self): - test_file = """ + test_file = """ donor count : 2 2 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) - self.assertFalse(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aabhtz)) - self.assertTrue(evaluator.evaluate(self.aadamc)) + self.assertTrue(evaluator.evaluate(self.aadamc)) - test_file = """ + test_file = """ donor count : 0 3 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertTrue(evaluator.evaluate(self.aadamc)) + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertTrue(evaluator.evaluate(self.aadamc)) def test_acceptor_count_filter(self): - test_file = """ + test_file = """ acceptor count : 7 7 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - - # regards Cl as an acceptor ... - self.assertTrue(evaluator.evaluate(self.aabhtz)) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aacani_ten)) + # regards Cl as an acceptor ... + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertTrue(evaluator.evaluate(self.aacani_ten)) def test_zprime(self): - test_file = """ + test_file = """ zprime range : 0.99 1.01 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aadrib)) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) def test_atomic_numbers(self): - test_file = """ + test_file = """ allowed atomic numbers : 1 6 7 8 must have atomic numbers : 1 6 7 8 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertFalse(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aadrib)) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertFalse(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) - test_file = """ + test_file = """ must have atomic numbers : 1 6 7 8 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aadrib)) + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) def test_rotatable_bond_count(self): - test_file = """ + test_file = """ rotatable bond count : 0 3 """ - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.abadis)) - + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.abadis)) def test_multiple(self): test_file = """ @@ -599,14 +598,14 @@ def test_multiple(self): counter = 0 hits = [] - test_entries = ['AABHTZ','ABAQEB','ABELEY', 'ADAQOM','ADARAA','ADARAZ','ADUWIG','AFEREK'] + test_entries = ['AABHTZ', 'ABAQEB', 'ABELEY', 'ADAQOM', 'ADARAA', 'ADARAZ', 'ADUWIG', 'AFEREK'] for id in test_entries: - e = self.reader.entry(id) + e = self.reader.entry(id) - if evaluator.evaluate(e): - hits.append(e.identifier) + if evaluator.evaluate(e): + hits.append(e.identifier) - self.assertEquals( ['ABAQEB','ABELEY','ADAQOM','ADUWIG','AFEREK'], hits ) + self.assertEquals(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) if __name__ == "__main__": From 06c3befe2d5445e353559e863d35b1ffd59112f2 Mon Sep 17 00:00:00 2001 From: Jason Christopher Cole <62337096+jasonccole@users.noreply.github.com> Date: Tue, 18 Mar 2025 14:32:01 +0000 Subject: [PATCH 06/20] Update scripts/refcodes_with_properties/refcodes_with_properties.py Co-authored-by: Alex Moldovan --- .../refcodes_with_properties.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scripts/refcodes_with_properties/refcodes_with_properties.py b/scripts/refcodes_with_properties/refcodes_with_properties.py index 341d8a1..b8ebc73 100644 --- a/scripts/refcodes_with_properties/refcodes_with_properties.py +++ b/scripts/refcodes_with_properties/refcodes_with_properties.py @@ -46,13 +46,12 @@ filterer = EntryPropertyCalculator.parse_control_file(open(control_file,"r").readlines()) reader = None - if refcode_file == None: - if database_file == None: - reader = io.EntryReader('CSD') - else: - reader = io.EntryReader(database_file) - else: + if refcode_file: reader = io.EntryReader(refcode_file, format='identifiers') + elif database_file: + reader = io.EntryReader(database_file) + else: + reader = io.EntryReader('CSD') if args.get_values: import csv From c1dfedb48a4c642800065d209a9816a618cd1c4f Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 14:36:41 +0000 Subject: [PATCH 07/20] further tidying (NO_JIRA) --- scripts/refcodes_with_properties/ReadMe.md | 29 +++++++++++----------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/scripts/refcodes_with_properties/ReadMe.md b/scripts/refcodes_with_properties/ReadMe.md index 0612547..44d6d4c 100644 --- a/scripts/refcodes_with_properties/ReadMe.md +++ b/scripts/refcodes_with_properties/ReadMe.md @@ -1,18 +1,16 @@ # Refcode List Generator - ## Summary -A script that allows you to create refcode lists (or CSV files of properties for a refcode list) for simple properties. The advantage of the script is -that the control is via an easy to read file so you can keep an interprettable record of how a test set was generated in research. You can also then -reproduce the list, or indeed run it on a new database and update it with the same conditions. - +A script that allows you to create refcode lists (or CSV files of properties for a refcode list) for simple properties. +The advantage of the script is that the control is via an easy to read file so you can keep an interprettable record of +how a test set was generated in research. You can also then reproduce the list, or indeed run it on a new database and +update it with the same conditions. ### Relevance -We want research to be FAIR (Findable, Attributable, Interoperable and Reproducible) - this script means we can create a simple decscription of the -test set used that any researcher could then reproduce from the script and the description. - +We want research to be FAIR (Findable, Attributable, Interoperable and Reproducible) - this script means we can create a +simple decscription of the test set used that any researcher could then reproduce from the script and the description. ## Requirements @@ -20,15 +18,12 @@ test set used that any researcher could then reproduce from the script and the d - ccdc.io - ccdc.search - ## Licensing Requirements - CSD-Core - ## Instructions on Running - ### Linux command line - load the CSD Python API Miniconda environment @@ -41,21 +36,25 @@ python refcodes_with_properties.py --help The above will print an extended help message that describes the registered -You can run the script with an Example file. Results are printed by default and can be redirected to be saved in an output file, e.g. +You can run the script with an Example file. Results are printed by default and can be redirected to be saved in an +output file, e.g. + ~~~ $ python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd ~~~ -This will generate a GCD file that can be used in other work. +This will generate a GCD file that can be used in other work. ### Windows CSD Python API + - launch a CMD window -- Use the installed version of the CSD Python API, for example C:\Users\\CCDC\ccdc-software\csd-python-api assuming the CCDC tools are installed in the ususal place do this +- Use the installed version of the CSD Python API, for example C:\Users\ + \CCDC\ccdc-software\csd-python-api assuming the CCDC tools are installed in the ususal place do this + ~~~ C:\Users\\CCDC\ccdc-software\csd-python-api\run_python_api.bat refcodes_with_properties.py --help ~~~ - ## Author _Jason C.Cole_ 2025 From 0d387acc737c1090a7fdcc916d977e638ba8f990 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 14:39:17 +0000 Subject: [PATCH 08/20] some review stuff (NO_JIRA) --- scripts/refcodes_with_properties/EntryPropertyCalculator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/refcodes_with_properties/EntryPropertyCalculator.py b/scripts/refcodes_with_properties/EntryPropertyCalculator.py index 678d59d..371e920 100644 --- a/scripts/refcodes_with_properties/EntryPropertyCalculator.py +++ b/scripts/refcodes_with_properties/EntryPropertyCalculator.py @@ -88,7 +88,7 @@ def __call__(self, theobject): class AllowedAtomicNumbersFilter(_Filter): def __init__(self, args): - self.allowed_atomic_numbers = [int(x) for x in args.strip().split()] + self.allowed_atomic_numbers = [int(atomic_number) for atomic_number in args.strip().split()] @staticmethod def name(): @@ -112,7 +112,7 @@ def __call__(self, entry): class MustContainAtomicNumbersFilter(_Filter): def __init__(self, args): - self.must_have_atomic_numbers = [int(x) for x in args.strip().split()] + self.must_have_atomic_numbers = [int(atomic_number) for atomic_number in args.strip().split()] @staticmethod def name(): From d7ee2dcdd3cedd557e34037c4032614a8e19649b Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 14:48:56 +0000 Subject: [PATCH 09/20] renmae files, move tests (NO_JIRA) --- ...ulator.py => entry_property_calculator.py} | 0 .../refcodes_with_properties.py | 10 +- .../test_entry_property_calculator.py | 153 ++++++++++++++++++ 3 files changed, 158 insertions(+), 5 deletions(-) rename scripts/refcodes_with_properties/{EntryPropertyCalculator.py => entry_property_calculator.py} (100%) create mode 100644 scripts/refcodes_with_properties/test_entry_property_calculator.py diff --git a/scripts/refcodes_with_properties/EntryPropertyCalculator.py b/scripts/refcodes_with_properties/entry_property_calculator.py similarity index 100% rename from scripts/refcodes_with_properties/EntryPropertyCalculator.py rename to scripts/refcodes_with_properties/entry_property_calculator.py diff --git a/scripts/refcodes_with_properties/refcodes_with_properties.py b/scripts/refcodes_with_properties/refcodes_with_properties.py index b8ebc73..0809e91 100644 --- a/scripts/refcodes_with_properties/refcodes_with_properties.py +++ b/scripts/refcodes_with_properties/refcodes_with_properties.py @@ -17,7 +17,7 @@ import sys import os import argparse -import EntryPropertyCalculator +import entry_property_calculator from ccdc import io if __name__ == '__main__': @@ -28,9 +28,9 @@ parser.add_argument( '-r','--refcode_file', help='input file containing the list of refcodes', default = None ) parser.add_argument( '-d','--database_file', help='input file containing the list of refcodes', default = None ) - parser.add_argument( '-c','--control_file', help='configuration file containing the desired properties\n\n %s' % (EntryPropertyCalculator.helptext()) ) - parser.add_argument( '-v','--get_values', action="store_true", help='calculate and print descriptor values where possible rather than filter\n\n %s' % (EntryPropertyCalculator.helptext()) ) - parser.add_argument( '-o','--output_file', default = None, help='output CSV file for results\n\n %s' % (EntryPropertyCalculator.helptext()) ) + parser.add_argument( '-c','--control_file', help='configuration file containing the desired properties\n\n %s' % (entry_property_calculator.helptext()) ) + parser.add_argument( '-v','--get_values', action="store_true", help='calculate and print descriptor values where possible rather than filter\n\n %s' % (entry_property_calculator.helptext()) ) + parser.add_argument( '-o','--output_file', default = None, help='output CSV file for results\n\n %s' % (entry_property_calculator.helptext()) ) args = parser.parse_args() @@ -43,7 +43,7 @@ if args.output_file != None: outfile = open(args.output_file, 'wb') - filterer = EntryPropertyCalculator.parse_control_file(open(control_file,"r").readlines()) + filterer = entry_property_calculator.parse_control_file(open(control_file,"r").readlines()) reader = None if refcode_file: diff --git a/scripts/refcodes_with_properties/test_entry_property_calculator.py b/scripts/refcodes_with_properties/test_entry_property_calculator.py new file mode 100644 index 0000000..1272645 --- /dev/null +++ b/scripts/refcodes_with_properties/test_entry_property_calculator.py @@ -0,0 +1,153 @@ +import unittest +from ccdc.io import EntryReader + +from entry_property_calculator import parse_control_file + +class TestFiltering(unittest.TestCase): + + def setUp(self): + + self.reader = EntryReader('CSD') + self.aabhtz = self.reader.entry("AABHTZ") + self.aacani_ten = self.reader.entry("AACANI10") + self.aadamc = self.reader.entry("AADAMC") + self.aadrib = self.reader.entry("AADRIB") + self.abadis = self.reader.entry("ABADIS") + + def test_organic_filter(self): + + test_file = """ +organic : 1 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertTrue(evaluator.evaluate(self.aabhtz)) + + self.assertFalse(evaluator.evaluate(self.aacani_ten)) + + def test_component_filter(self): + test_file = """ +component range : 0 1 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertTrue(evaluator.evaluate(self.aabhtz)) + + self.assertFalse(evaluator.evaluate(self.aacani_ten)) + + def test_donor_count_filter(self): + test_file = """ +donor count : 2 2 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertFalse(evaluator.evaluate(self.aabhtz)) + + self.assertTrue(evaluator.evaluate(self.aadamc)) + + test_file = """ +donor count : 0 3 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertTrue(evaluator.evaluate(self.aadamc)) + + def test_acceptor_count_filter(self): + test_file = """ +acceptor count : 7 7 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + # regards Cl as an acceptor ... + self.assertTrue(evaluator.evaluate(self.aabhtz)) + + self.assertTrue(evaluator.evaluate(self.aacani_ten)) + + def test_zprime(self): + test_file = """ +zprime range : 0.99 1.01 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) + + def test_atomic_numbers(self): + test_file = """ +allowed atomic numbers : 1 6 7 8 +must have atomic numbers : 1 6 7 8 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertFalse(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) + + test_file = """ +must have atomic numbers : 1 6 7 8 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.aabhtz)) + self.assertFalse(evaluator.evaluate(self.aadrib)) + + def test_rotatable_bond_count(self): + test_file = """ +rotatable bond count : 0 3 +""" + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + self.assertTrue(evaluator.evaluate(self.abadis)) + + def test_multiple(self): + test_file = """ + +# An example control file +# +# +# only include organic structures as output +organic : 1 +# specify a range of donors +donor count : 0 10 +# specify a range of acceptors +acceptor count : 5 5 +# rotatable bond count range +rotatable bond count : 3 7 +# number of atoms to allow through +atom count : 0 100 +# only include structures containing Hydrogen, Carbon, Nitrogen or Oxygen and nothing else +allowed atomic numbers : 1 6 7 8 +# only include structures containing all of these elements (i.e.) Hydrogen, Carbon, Nitrogen or Oxygen +must have atomic numbers : 1 6 7 8 +# Ensure Z-prime is one +zprime range : 0.99 1.01 +# Ensure only one component in the structure +component range : 2 2 +# Dont include disordered structures +disordered : 0 +# Specify an R-factor range +rfactor range : 0.1 5 +# + + +""" + + lines = test_file.split('\n') + evaluator = parse_control_file(lines) + + counter = 0 + hits = [] + + test_entries = ['AABHTZ', 'ABAQEB', 'ABELEY', 'ADAQOM', 'ADARAA', 'ADARAZ', 'ADUWIG', 'AFEREK'] + for id in test_entries: + e = self.reader.entry(id) + + if evaluator.evaluate(e): + hits.append(e.identifier) + + self.assertEquals(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) \ No newline at end of file From 0aaed47524be6f7c91920636af736d9d5d4eaea2 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:03:48 +0000 Subject: [PATCH 10/20] refactor and fix tests (NO_JIRA) --- .../entry_property_calculator.py | 173 +----------------- .../test_entry_property_calculator.py | 4 +- 2 files changed, 9 insertions(+), 168 deletions(-) diff --git a/scripts/refcodes_with_properties/entry_property_calculator.py b/scripts/refcodes_with_properties/entry_property_calculator.py index 371e920..2e74cd5 100644 --- a/scripts/refcodes_with_properties/entry_property_calculator.py +++ b/scripts/refcodes_with_properties/entry_property_calculator.py @@ -101,8 +101,7 @@ def helptext(): def __call__(self, entry): try: molecule = entry.crystal.molecule - return len([x for x in molecule.atoms if x.atomic_number in self.allowed_atomic_numbers]) == len( - molecule.atoms) + return len([x for x in molecule.atoms if x.atomic_number in self.allowed_atomic_numbers]) == len(molecule.atoms) except TypeError: return False @@ -126,17 +125,16 @@ def __call__(self, entry): try: molecule = entry.crystal.molecule - contains = {} + contains = set() for x in molecule.atoms: - if not contains.has_key(x.atomic_number): - contains[x.atomic_number] = 0 - contains[x.atomic_number] = contains[x.atomic_number] + 1 + contains.add(x.atomic_number) + for x in self.must_have_atomic_numbers: - if not contains.has_key(x): + if x not in contains: return False return True - except: + except TypeError: return False @@ -196,7 +194,7 @@ def helptext(): def value(self, entry): try: return entry.crystal.molecule.all_atoms_have_sites - except: + except TypeError: return False @@ -453,160 +451,3 @@ def parse_control_file(lines): cls = _filter_classes[parts[0].strip()] evaluator.add_filter(cls(parts[1])) return evaluator - - -import unittest - - -class TestFiltering(unittest.TestCase): - - def setUp(self): - - self.reader = ccdc.io.EntryReader('CSD') - self.aabhtz = self.reader.entry("AABHTZ") - self.aacani_ten = self.reader.entry("AACANI10") - self.aadamc = self.reader.entry("AADAMC") - self.aadrib = self.reader.entry("AADRIB") - self.abadis = self.reader.entry("ABADIS") - - def test_organic_filter(self): - - test_file = """ -organic : 1 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - - self.assertTrue(evaluator.evaluate(self.aabhtz)) - - self.assertFalse(evaluator.evaluate(self.aacani_ten)) - - def test_component_filter(self): - test_file = """ -component range : 0 1 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - - self.assertTrue(evaluator.evaluate(self.aabhtz)) - - self.assertFalse(evaluator.evaluate(self.aacani_ten)) - - def test_donor_count_filter(self): - test_file = """ -donor count : 2 2 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - - self.assertFalse(evaluator.evaluate(self.aabhtz)) - - self.assertTrue(evaluator.evaluate(self.aadamc)) - - test_file = """ -donor count : 0 3 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - - self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertTrue(evaluator.evaluate(self.aadamc)) - - def test_acceptor_count_filter(self): - test_file = """ -acceptor count : 7 7 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - - # regards Cl as an acceptor ... - self.assertTrue(evaluator.evaluate(self.aabhtz)) - - self.assertTrue(evaluator.evaluate(self.aacani_ten)) - - def test_zprime(self): - test_file = """ -zprime range : 0.99 1.01 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aadrib)) - - def test_atomic_numbers(self): - test_file = """ -allowed atomic numbers : 1 6 7 8 -must have atomic numbers : 1 6 7 8 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertFalse(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aadrib)) - - test_file = """ -must have atomic numbers : 1 6 7 8 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.aabhtz)) - self.assertFalse(evaluator.evaluate(self.aadrib)) - - def test_rotatable_bond_count(self): - test_file = """ -rotatable bond count : 0 3 -""" - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - self.assertTrue(evaluator.evaluate(self.abadis)) - - def test_multiple(self): - test_file = """ - -# An example control file -# -# -# only include organic structures as output -organic : 1 -# specify a range of donors -donor count : 0 10 -# specify a range of acceptors -acceptor count : 5 5 -# rotatable bond count range -rotatable bond count : 3 7 -# number of atoms to allow through -atom count : 0 100 -# only include structures containing Hydrogen, Carbon, Nitrogen or Oxygen and nothing else -allowed atomic numbers : 1 6 7 8 -# only include structures containing all of these elements (i.e.) Hydrogen, Carbon, Nitrogen or Oxygen -must have atomic numbers : 1 6 7 8 -# Ensure Z-prime is one -zprime range : 0.99 1.01 -# Ensure only one component in the structure -component range : 2 2 -# Dont include disordered structures -disordered : 0 -# Specify an R-factor range -rfactor range : 0.1 5 -# - - -""" - - lines = test_file.split('\n') - evaluator = parse_control_file(lines) - - counter = 0 - hits = [] - - test_entries = ['AABHTZ', 'ABAQEB', 'ABELEY', 'ADAQOM', 'ADARAA', 'ADARAZ', 'ADUWIG', 'AFEREK'] - for id in test_entries: - e = self.reader.entry(id) - - if evaluator.evaluate(e): - hits.append(e.identifier) - - self.assertEquals(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) - - -if __name__ == "__main__": - unittest.main() diff --git a/scripts/refcodes_with_properties/test_entry_property_calculator.py b/scripts/refcodes_with_properties/test_entry_property_calculator.py index 1272645..9c02d4a 100644 --- a/scripts/refcodes_with_properties/test_entry_property_calculator.py +++ b/scripts/refcodes_with_properties/test_entry_property_calculator.py @@ -98,7 +98,7 @@ def test_atomic_numbers(self): def test_rotatable_bond_count(self): test_file = """ -rotatable bond count : 0 3 +rotatable bond count : 0 4 """ lines = test_file.split('\n') evaluator = parse_control_file(lines) @@ -150,4 +150,4 @@ def test_multiple(self): if evaluator.evaluate(e): hits.append(e.identifier) - self.assertEquals(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) \ No newline at end of file + self.assertEqual(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) \ No newline at end of file From 2028a7413ce82446b81e481c2ce49161bde3f7c9 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:09:49 +0000 Subject: [PATCH 11/20] make supers more standard (NO_JIRA) --- .../entry_property_calculator.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/scripts/refcodes_with_properties/entry_property_calculator.py b/scripts/refcodes_with_properties/entry_property_calculator.py index 2e74cd5..f0b5e0a 100644 --- a/scripts/refcodes_with_properties/entry_property_calculator.py +++ b/scripts/refcodes_with_properties/entry_property_calculator.py @@ -59,7 +59,7 @@ def argument_pair(): class _ComparativeFilter(_Filter): def __init__(self, args): value = False - if eval(args.strip()) == 1: + if args.strip() == '1' or args.strip().lower() == 'true': value = True self.expected_value = value @@ -143,7 +143,7 @@ def __call__(self, entry): class OrganicFilter(_ComparativeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -162,7 +162,7 @@ def value(self, entry): class PolymericFilter(_ComparativeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -181,7 +181,7 @@ def value(self, entry): class AllHaveSitesFilter(_ComparativeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -203,7 +203,7 @@ def value(self, entry): class DisorderedFilter(_ComparativeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -222,7 +222,7 @@ def value(self, entry): class AtomicWeightFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -245,7 +245,7 @@ def value(self, entry): class AtomCountFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -268,7 +268,7 @@ def value(self, entry): class RotatableBondFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -291,7 +291,7 @@ def value(self, entry): class DonorCountFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -314,7 +314,7 @@ def value(self, entry): class AcceptorCountFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -337,7 +337,7 @@ def value(self, entry): class ComponentCountFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -359,7 +359,7 @@ def value(self, entry): class ZPrimeFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -378,7 +378,7 @@ def value(self, entry): class RfactorFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): @@ -397,7 +397,7 @@ def value(self, entry): class SpacegroupNumberFilter(_RangeFilter): def __init__(self, args): - super(self.__class__, self).__init__(args) + super().__init__(args) @staticmethod def name(): From 8a83ce8030840a53364cca1b2dda822be43d4247 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:14:28 +0000 Subject: [PATCH 12/20] pycharm formatting (NO_JIRA) --- .../refcodes_with_properties.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/scripts/refcodes_with_properties/refcodes_with_properties.py b/scripts/refcodes_with_properties/refcodes_with_properties.py index 0809e91..9713871 100644 --- a/scripts/refcodes_with_properties/refcodes_with_properties.py +++ b/scripts/refcodes_with_properties/refcodes_with_properties.py @@ -14,23 +14,27 @@ ######################################################################### -import sys -import os import argparse -import entry_property_calculator +import sys + from ccdc import io -if __name__ == '__main__': +import entry_property_calculator - parser = argparse.ArgumentParser( description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter ) +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument( '-r','--refcode_file', help='input file containing the list of refcodes', default = None ) - parser.add_argument( '-d','--database_file', help='input file containing the list of refcodes', default = None ) - parser.add_argument( '-c','--control_file', help='configuration file containing the desired properties\n\n %s' % (entry_property_calculator.helptext()) ) - parser.add_argument( '-v','--get_values', action="store_true", help='calculate and print descriptor values where possible rather than filter\n\n %s' % (entry_property_calculator.helptext()) ) - parser.add_argument( '-o','--output_file', default = None, help='output CSV file for results\n\n %s' % (entry_property_calculator.helptext()) ) + parser.add_argument('-r', '--refcode_file', help='input file containing the list of refcodes', default=None) + parser.add_argument('-d', '--database_file', help='input file containing the list of refcodes', default=None) + parser.add_argument('-c', '--control_file', help='configuration file containing the desired properties\n\n %s' % ( + entry_property_calculator.helptext())) + parser.add_argument('-v', '--get_values', action="store_true", + help='calculate and print descriptor values where possible rather than filter\n\n %s' % ( + entry_property_calculator.helptext())) + parser.add_argument('-o', '--output_file', default=None, + help='output CSV file for results\n\n %s' % (entry_property_calculator.helptext())) args = parser.parse_args() @@ -43,7 +47,7 @@ if args.output_file != None: outfile = open(args.output_file, 'wb') - filterer = entry_property_calculator.parse_control_file(open(control_file,"r").readlines()) + filterer = entry_property_calculator.parse_control_file(open(control_file, "r").readlines()) reader = None if refcode_file: @@ -55,11 +59,12 @@ if args.get_values: import csv - csvwriter = None + + csvwriter = None for entry in reader: values = filterer.values(entry) if csvwriter == None: - fieldnames=["identifier"] + values.keys() + fieldnames = ["identifier"] + values.keys() csvwriter = csv.DictWriter(outfile, fieldnames=fieldnames) csvwriter.writeheader() values["identifier"] = entry.identifier From bb9a48fa6027604c29620351a9633f862097514f Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:15:30 +0000 Subject: [PATCH 13/20] pycharm format (NO_JIRA) --- .../test_entry_property_calculator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/refcodes_with_properties/test_entry_property_calculator.py b/scripts/refcodes_with_properties/test_entry_property_calculator.py index 9c02d4a..52025b0 100644 --- a/scripts/refcodes_with_properties/test_entry_property_calculator.py +++ b/scripts/refcodes_with_properties/test_entry_property_calculator.py @@ -1,8 +1,10 @@ import unittest + from ccdc.io import EntryReader from entry_property_calculator import parse_control_file + class TestFiltering(unittest.TestCase): def setUp(self): @@ -150,4 +152,4 @@ def test_multiple(self): if evaluator.evaluate(e): hits.append(e.identifier) - self.assertEqual(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) \ No newline at end of file + self.assertEqual(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) From ccd903616ef5759cde1af17b398248aafbfbce6f Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:19:50 +0000 Subject: [PATCH 14/20] fix definition of unimplemented function (NO_JIRA) --- scripts/refcodes_with_properties/entry_property_calculator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/refcodes_with_properties/entry_property_calculator.py b/scripts/refcodes_with_properties/entry_property_calculator.py index f0b5e0a..fd5553c 100644 --- a/scripts/refcodes_with_properties/entry_property_calculator.py +++ b/scripts/refcodes_with_properties/entry_property_calculator.py @@ -64,7 +64,7 @@ def __init__(self, args): self.expected_value = value - def value(self): + def value(self, theobject): raise NotImplementedError # override this def __call__(self, theobject): @@ -78,7 +78,7 @@ def __init__(self, args): self.minimum = float(parts[0]) self.maximum = float(parts[1]) - def value(self): + def value(self, theobject): raise NotImplementedError # override this def __call__(self, theobject): From 630609fb21d8790b62fc55dcd5a36af8f93f16f6 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:21:55 +0000 Subject: [PATCH 15/20] fix imports (NO_JIRA) --- scripts/refcodes_with_properties/refcodes_with_properties.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/refcodes_with_properties/refcodes_with_properties.py b/scripts/refcodes_with_properties/refcodes_with_properties.py index 9713871..b8e26fb 100644 --- a/scripts/refcodes_with_properties/refcodes_with_properties.py +++ b/scripts/refcodes_with_properties/refcodes_with_properties.py @@ -15,6 +15,7 @@ ######################################################################### import argparse +import csv import sys from ccdc import io @@ -58,7 +59,6 @@ reader = io.EntryReader('CSD') if args.get_values: - import csv csvwriter = None for entry in reader: From 6eb4f50d3ae1c9b6bc15ae0cb13e16a3fe7c821c Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:24:26 +0000 Subject: [PATCH 16/20] more barks fixed (NO_JIRA) --- .../refcodes_with_properties/test_entry_property_calculator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/refcodes_with_properties/test_entry_property_calculator.py b/scripts/refcodes_with_properties/test_entry_property_calculator.py index 52025b0..c89488e 100644 --- a/scripts/refcodes_with_properties/test_entry_property_calculator.py +++ b/scripts/refcodes_with_properties/test_entry_property_calculator.py @@ -141,8 +141,6 @@ def test_multiple(self): lines = test_file.split('\n') evaluator = parse_control_file(lines) - - counter = 0 hits = [] test_entries = ['AABHTZ', 'ABAQEB', 'ABELEY', 'ADAQOM', 'ADARAA', 'ADARAZ', 'ADUWIG', 'AFEREK'] From 3b7aabc5643cd72cdbefa92b309217442ba83930 Mon Sep 17 00:00:00 2001 From: Jason Christopher Cole <62337096+jasonccole@users.noreply.github.com> Date: Tue, 18 Mar 2025 15:26:34 +0000 Subject: [PATCH 17/20] Update scripts/refcodes_with_properties/ReadMe.md (NO_JIRA) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- scripts/refcodes_with_properties/ReadMe.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/refcodes_with_properties/ReadMe.md b/scripts/refcodes_with_properties/ReadMe.md index 44d6d4c..4b5fdf8 100644 --- a/scripts/refcodes_with_properties/ReadMe.md +++ b/scripts/refcodes_with_properties/ReadMe.md @@ -40,7 +40,7 @@ You can run the script with an Example file. Results are printed by default and output file, e.g. ~~~ -$ python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd +python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd ~~~ This will generate a GCD file that can be used in other work. From db8943e7090f24e355dec2ef42466d9b9b9a3fcc Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:29:34 +0000 Subject: [PATCH 18/20] fix bark about != None (NO_JIRA) --- scripts/refcodes_with_properties/refcodes_with_properties.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/refcodes_with_properties/refcodes_with_properties.py b/scripts/refcodes_with_properties/refcodes_with_properties.py index b8e26fb..e98b7ea 100644 --- a/scripts/refcodes_with_properties/refcodes_with_properties.py +++ b/scripts/refcodes_with_properties/refcodes_with_properties.py @@ -45,7 +45,7 @@ print_values = args.get_values outfile = sys.stdout - if args.output_file != None: + if args.output_file is not None: outfile = open(args.output_file, 'wb') filterer = entry_property_calculator.parse_control_file(open(control_file, "r").readlines()) From c87b0d54b26ece173cb6b3348b8c5adfba356ae2 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:30:00 +0000 Subject: [PATCH 19/20] Fix bark about MD40 (NO_JIRA) --- scripts/refcodes_with_properties/ReadMe.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/refcodes_with_properties/ReadMe.md b/scripts/refcodes_with_properties/ReadMe.md index 44d6d4c..9711ec6 100644 --- a/scripts/refcodes_with_properties/ReadMe.md +++ b/scripts/refcodes_with_properties/ReadMe.md @@ -30,7 +30,7 @@ simple decscription of the test set used that any researcher could then reproduc - create a text control file with the various control lines specified - call Python to read the script and specify necessary arguments -~~~ +~~~bash python refcodes_with_properties.py --help ~~~ @@ -39,8 +39,8 @@ The above will print an extended help message that describes the registered You can run the script with an Example file. Results are printed by default and can be redirected to be saved in an output file, e.g. -~~~ -$ python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd +~~~bash +python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd ~~~ This will generate a GCD file that can be used in other work. @@ -51,7 +51,7 @@ This will generate a GCD file that can be used in other work. - Use the installed version of the CSD Python API, for example C:\Users\ \CCDC\ccdc-software\csd-python-api assuming the CCDC tools are installed in the ususal place do this -~~~ +~~~bat C:\Users\\CCDC\ccdc-software\csd-python-api\run_python_api.bat refcodes_with_properties.py --help ~~~ From b6dea4732ab0f6c9b9ee583623125a2caaff7871 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Tue, 18 Mar 2025 15:31:25 +0000 Subject: [PATCH 20/20] remove unused import (NO_JIRA) --- scripts/refcodes_with_properties/entry_property_calculator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/refcodes_with_properties/entry_property_calculator.py b/scripts/refcodes_with_properties/entry_property_calculator.py index fd5553c..588a17e 100644 --- a/scripts/refcodes_with_properties/entry_property_calculator.py +++ b/scripts/refcodes_with_properties/entry_property_calculator.py @@ -12,7 +12,6 @@ Utility classes for filtering CSD entries based on a property control file ''' -import ccdc.io _filter_classes = {}