Skip to content

Commit

Permalink
Merge pull request #659 from padix-key/cif
Browse files Browse the repository at this point in the history
Fix invalid CIF/BCIF files created when file is edited
  • Loading branch information
padix-key authored Sep 9, 2024
2 parents dc691a5 + 2dddcba commit d0107fa
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 17 deletions.
32 changes: 26 additions & 6 deletions src/biotite/structure/io/pdbx/bcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,12 @@ class BinaryCIFBlock(_HierarchicalContainer):
"""

def __init__(self, categories=None):
super().__init__(categories)
if categories is None:
categories = {}
super().__init__(
# Actual bcif files use leading '_' as category names
{"_" + name: category for name, category in categories.items()}
)

@staticmethod
def subcomponent_class():
Expand All @@ -470,21 +475,36 @@ def supercomponent_class():
@staticmethod
def deserialize(content):
return BinaryCIFBlock(
BinaryCIFBlock._deserialize_elements(content["categories"], "name")
{
# The superclass uses leading '_' in category names,
# but on the level of this class, the leading '_' is omitted
name.lstrip("_"): category
for name, category in BinaryCIFBlock._deserialize_elements(
content["categories"], "name"
).items()
}
)

def serialize(self):
return {"categories": self._serialize_elements("name")}

def __getitem__(self, key):
# Actual bcif files use leading '_' as categories
return super().__getitem__("_" + key)
try:
return super().__getitem__("_" + key)
except KeyError:
raise KeyError(key)

def __setitem__(self, key, element):
return super().__setitem__("_" + key, element)
try:
return super().__setitem__("_" + key, element)
except KeyError:
raise KeyError(key)

def __delitem__(self, key):
return super().__setitem__("_" + key)
try:
return super().__setitem__("_" + key)
except KeyError:
raise KeyError(key)

def __iter__(self):
return (key.lstrip("_") for key in super().__iter__())
Expand Down
38 changes: 31 additions & 7 deletions src/biotite/structure/io/pdbx/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,17 @@ class CIFBlock(_Component, MutableMapping):
The keys are the category names and the values are the
:class:`CIFCategory` objects.
By default, an empty block is created.
name : str, optional
The name of the block.
This is only used for serialization and is automatically set,
when the :class:`CIFBlock` is added to a :class:`CIFFile`.
It only needs to be set manually, when the block is directly
serialized.
Attributes
----------
name : str
The name of the block.
Notes
-----
Expand All @@ -580,13 +591,15 @@ class CIFBlock(_Component, MutableMapping):
--------
>>> # Add category on creation
>>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})})
>>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}, name="baz")
>>> # Add category later on
>>> block["bar"] = CIFCategory({"another_column": [2, 3]})
>>> # Access a column
>>> print(block["bar"]["another_column"].as_array())
['2' '3']
>>> print(block.serialize())
data_baz
#
_foo.some_column 1
#
loop_
Expand All @@ -596,11 +609,20 @@ class CIFBlock(_Component, MutableMapping):
#
"""

def __init__(self, categories=None):
def __init__(self, categories=None, name=None):
self._name = name
if categories is None:
categories = {}
self._categories = categories

@property
def name(self):
return self._name

@name.setter
def name(self, name):
self._name = name

@staticmethod
def subcomponent_class():
return CIFCategory
Expand Down Expand Up @@ -634,7 +656,10 @@ def deserialize(text):
return CIFBlock(_create_element_dict(lines, category_names, category_starts))

def serialize(self):
text_blocks = []
if self._name is None:
raise SerializationError("Block name is required")
# The block starts with the black name line followed by a comment line
text_blocks = ["data_" + self._name + "\n#\n"]
for category_name, category in self._categories.items():
if isinstance(category, str):
# Category is already stored as lines
Expand Down Expand Up @@ -806,14 +831,12 @@ def deserialize(text):
def serialize(self):
text_blocks = []
for block_name, block in self._blocks.items():
text_blocks.append("data_" + block_name + "\n")
# A comment line is set after the block indicator
text_blocks.append("#\n")
if isinstance(block, str):
# Block is already stored as text
text_blocks.append(block)
else:
try:
block.name = block_name
text_blocks.append(block.serialize())
except Exception:
raise SerializationError(
Expand Down Expand Up @@ -884,6 +907,7 @@ def __getitem__(self, key):
def __setitem__(self, key, block):
if not isinstance(block, CIFBlock):
raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
block.name = key
self._blocks[key] = block

def __delitem__(self, key):
Expand Down Expand Up @@ -921,7 +945,7 @@ def _create_element_dict(lines, element_names, element_starts):
# Lazy deserialization
# -> keep as text for now and deserialize later if needed
return {
element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + "\n"
for i, element_name in enumerate(element_names)
}

Expand Down
8 changes: 4 additions & 4 deletions src/biotite/structure/io/pdbx/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ def _serialize_elements(self, store_key_in=None):
Parameters
----------
store_key_in: str, optional
If given, the key of each element is stored as value in the
serialized element.
This is basically the reverse operation of `take_key_from` in
:meth:`_deserialize_elements()`.
If given, the key of each element is stored as value in the
serialized element.
This is basically the reverse operation of `take_key_from` in
:meth:`_deserialize_elements()`.
"""
serialized_elements = []
for key, element in self._elements.items():
Expand Down
37 changes: 37 additions & 0 deletions tests/structure/test_pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,43 @@ def test_serialization_consistency(format, create_new_encoding):
raise Exception(f"Comparison failed for '{category_name}.{key}'")


@pytest.mark.parametrize(
"format, level", itertools.product(["cif", "bcif"], ["block", "category", "column"])
)
def test_editing(tmpdir, format, level):
"""
Check if editing an existing PDBx file works, by checking if replacing some
category/block/column with a copy of itself does not affect the content.
"""
File = pdbx.CIFFile if format == "cif" else pdbx.BinaryCIFFile
Block = File.subcomponent_class()
Category = Block.subcomponent_class()
Column = Category.subcomponent_class()

column = Column(["a", "b", "c"])
category = Category({"foo_col": column, "bar_col": column, "baz_col": column})
block = Block({"foo_cat": category, "bar_cat": category, "baz_cat": category})
ref_pdbx_file = File({"foo_block": block, "bar_block": block, "baz_block": block})
ref_pdbx_file.write(join(tmpdir, f"original.{format}"))

pdbx_file = File.read(join(tmpdir, f"original.{format}"))
if level == "block":
# Replace block in the mid,
# to check if the block before and after remain the same
pdbx_file["bar_block"] = pdbx_file["bar_block"]
elif level == "category":
pdbx_file["bar_block"]["bar_cat"] = pdbx_file["bar_block"]["bar_cat"]
elif level == "column":
pdbx_file["bar_block"]["bar_cat"]["bar_col"] = pdbx_file["bar_block"][
"bar_cat"
]["bar_col"]
pdbx_file.write(join(tmpdir, f"edited.{format}"))

test_pdbx_file = File.read(join(tmpdir, f"edited.{format}"))
# As the content should not have changed, the serialized files should be identical
assert test_pdbx_file.serialize() == ref_pdbx_file.serialize()


def _clear_encoding(category):
columns = {}
for key, col in category.items():
Expand Down

0 comments on commit d0107fa

Please sign in to comment.