From a491e9e748ea5f08d96cf8fdc21ec0f84e51a657 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Thu, 5 Sep 2024 10:36:32 +0200 Subject: [PATCH 1/4] Fix missing line break at end of category/block --- src/biotite/structure/io/pdbx/cif.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biotite/structure/io/pdbx/cif.py b/src/biotite/structure/io/pdbx/cif.py index 5951d4e08..6bc43d42a 100644 --- a/src/biotite/structure/io/pdbx/cif.py +++ b/src/biotite/structure/io/pdbx/cif.py @@ -921,7 +921,7 @@ def _create_element_dict(lines, element_names, element_starts): # Lazy deserialization # -> keep as text for now and deserialize later if needed return { - element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + "\n" for i, element_name in enumerate(element_names) } From 7a52f8203340eb97e2b6fcf73658457e47213400 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Thu, 5 Sep 2024 11:31:32 +0200 Subject: [PATCH 2/4] Fix inconsistent usage of underscores in category names --- src/biotite/structure/io/pdbx/bcif.py | 32 ++++++++++++++++++---- src/biotite/structure/io/pdbx/component.py | 8 +++--- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/biotite/structure/io/pdbx/bcif.py b/src/biotite/structure/io/pdbx/bcif.py index 58f43040a..8921935f3 100644 --- a/src/biotite/structure/io/pdbx/bcif.py +++ b/src/biotite/structure/io/pdbx/bcif.py @@ -457,7 +457,12 @@ class BinaryCIFBlock(_HierarchicalContainer): """ def __init__(self, categories=None): - super().__init__(categories) + if categories is None: + categories = {} + super().__init__( + # Actual bcif files use leading '_' as category names + {"_" + name: category for name, category in categories.items()} + ) @staticmethod def subcomponent_class(): @@ -470,21 +475,36 @@ def supercomponent_class(): @staticmethod def deserialize(content): return BinaryCIFBlock( - BinaryCIFBlock._deserialize_elements(content["categories"], "name") + { + # The superclass uses leading '_' in category names, + # but on the level of this class, the leading '_' is omitted + name.lstrip("_"): category + for name, category in BinaryCIFBlock._deserialize_elements( + content["categories"], "name" + ).items() + } ) def serialize(self): return {"categories": self._serialize_elements("name")} def __getitem__(self, key): - # Actual bcif files use leading '_' as categories - return super().__getitem__("_" + key) + try: + return super().__getitem__("_" + key) + except KeyError: + raise KeyError(key) def __setitem__(self, key, element): - return super().__setitem__("_" + key, element) + try: + return super().__setitem__("_" + key, element) + except KeyError: + raise KeyError(key) def __delitem__(self, key): - return super().__setitem__("_" + key) + try: + return super().__setitem__("_" + key) + except KeyError: + raise KeyError(key) def __iter__(self): return (key.lstrip("_") for key in super().__iter__()) diff --git a/src/biotite/structure/io/pdbx/component.py b/src/biotite/structure/io/pdbx/component.py index fb2f228ed..740bd56c3 100644 --- a/src/biotite/structure/io/pdbx/component.py +++ b/src/biotite/structure/io/pdbx/component.py @@ -171,10 +171,10 @@ def _serialize_elements(self, store_key_in=None): Parameters ---------- store_key_in: str, optional - If given, the key of each element is stored as value in the - serialized element. - This is basically the reverse operation of `take_key_from` in - :meth:`_deserialize_elements()`. + If given, the key of each element is stored as value in the + serialized element. + This is basically the reverse operation of `take_key_from` in + :meth:`_deserialize_elements()`. """ serialized_elements = [] for key, element in self._elements.items(): From 2a7bbf7e942b3ba37fe2e5c31ccf30ebf854b811 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Thu, 5 Sep 2024 12:18:21 +0200 Subject: [PATCH 3/4] Fix duplicate block header when rewriting existing blocks --- src/biotite/structure/io/pdbx/cif.py | 36 +++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/biotite/structure/io/pdbx/cif.py b/src/biotite/structure/io/pdbx/cif.py index 6bc43d42a..f7bfd4c79 100644 --- a/src/biotite/structure/io/pdbx/cif.py +++ b/src/biotite/structure/io/pdbx/cif.py @@ -569,6 +569,17 @@ class CIFBlock(_Component, MutableMapping): The keys are the category names and the values are the :class:`CIFCategory` objects. By default, an empty block is created. + name : str, optional + The name of the block. + This is only used for serialization and is automatically set, + when the :class:`CIFBlock` is added to a :class:`CIFFile`. + It only needs to be set manually, when the block is directly + serialized. + + Attributes + ---------- + name : str + The name of the block. Notes ----- @@ -580,13 +591,15 @@ class CIFBlock(_Component, MutableMapping): -------- >>> # Add category on creation - >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}) + >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}, name="baz") >>> # Add category later on >>> block["bar"] = CIFCategory({"another_column": [2, 3]}) >>> # Access a column >>> print(block["bar"]["another_column"].as_array()) ['2' '3'] >>> print(block.serialize()) + data_baz + # _foo.some_column 1 # loop_ @@ -596,11 +609,20 @@ class CIFBlock(_Component, MutableMapping): # """ - def __init__(self, categories=None): + def __init__(self, categories=None, name=None): + self._name = name if categories is None: categories = {} self._categories = categories + @property + def name(self): + return self._name + + @name.setter + def name(self, name): + self._name = name + @staticmethod def subcomponent_class(): return CIFCategory @@ -634,7 +656,10 @@ def deserialize(text): return CIFBlock(_create_element_dict(lines, category_names, category_starts)) def serialize(self): - text_blocks = [] + if self._name is None: + raise SerializationError("Block name is required") + # The block starts with the black name line followed by a comment line + text_blocks = ["data_" + self._name + "\n#\n"] for category_name, category in self._categories.items(): if isinstance(category, str): # Category is already stored as lines @@ -806,14 +831,12 @@ def deserialize(text): def serialize(self): text_blocks = [] for block_name, block in self._blocks.items(): - text_blocks.append("data_" + block_name + "\n") - # A comment line is set after the block indicator - text_blocks.append("#\n") if isinstance(block, str): # Block is already stored as text text_blocks.append(block) else: try: + block.name = block_name text_blocks.append(block.serialize()) except Exception: raise SerializationError( @@ -884,6 +907,7 @@ def __getitem__(self, key): def __setitem__(self, key, block): if not isinstance(block, CIFBlock): raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'") + block.name = key self._blocks[key] = block def __delitem__(self, key): From 2dddcbabe6c1c755f167297e9a1ef3c508bd83c9 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Thu, 5 Sep 2024 12:20:22 +0200 Subject: [PATCH 4/4] Test editing existing PDBx files --- tests/structure/test_pdbx.py | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/structure/test_pdbx.py b/tests/structure/test_pdbx.py index 6fc150fda..6e41c4b2f 100644 --- a/tests/structure/test_pdbx.py +++ b/tests/structure/test_pdbx.py @@ -704,6 +704,43 @@ def test_serialization_consistency(format, create_new_encoding): raise Exception(f"Comparison failed for '{category_name}.{key}'") +@pytest.mark.parametrize( + "format, level", itertools.product(["cif", "bcif"], ["block", "category", "column"]) +) +def test_editing(tmpdir, format, level): + """ + Check if editing an existing PDBx file works, by checking if replacing some + category/block/column with a copy of itself does not affect the content. + """ + File = pdbx.CIFFile if format == "cif" else pdbx.BinaryCIFFile + Block = File.subcomponent_class() + Category = Block.subcomponent_class() + Column = Category.subcomponent_class() + + column = Column(["a", "b", "c"]) + category = Category({"foo_col": column, "bar_col": column, "baz_col": column}) + block = Block({"foo_cat": category, "bar_cat": category, "baz_cat": category}) + ref_pdbx_file = File({"foo_block": block, "bar_block": block, "baz_block": block}) + ref_pdbx_file.write(join(tmpdir, f"original.{format}")) + + pdbx_file = File.read(join(tmpdir, f"original.{format}")) + if level == "block": + # Replace block in the mid, + # to check if the block before and after remain the same + pdbx_file["bar_block"] = pdbx_file["bar_block"] + elif level == "category": + pdbx_file["bar_block"]["bar_cat"] = pdbx_file["bar_block"]["bar_cat"] + elif level == "column": + pdbx_file["bar_block"]["bar_cat"]["bar_col"] = pdbx_file["bar_block"][ + "bar_cat" + ]["bar_col"] + pdbx_file.write(join(tmpdir, f"edited.{format}")) + + test_pdbx_file = File.read(join(tmpdir, f"edited.{format}")) + # As the content should not have changed, the serialized files should be identical + assert test_pdbx_file.serialize() == ref_pdbx_file.serialize() + + def _clear_encoding(category): columns = {} for key, col in category.items():