Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix invalid CIF/BCIF files created when file is edited #659

Merged
merged 4 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions src/biotite/structure/io/pdbx/bcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,12 @@ class BinaryCIFBlock(_HierarchicalContainer):
"""

def __init__(self, categories=None):
super().__init__(categories)
if categories is None:
categories = {}
super().__init__(
# Actual bcif files use leading '_' as category names
{"_" + name: category for name, category in categories.items()}
)

@staticmethod
def subcomponent_class():
Expand All @@ -470,21 +475,36 @@ def supercomponent_class():
@staticmethod
def deserialize(content):
return BinaryCIFBlock(
BinaryCIFBlock._deserialize_elements(content["categories"], "name")
{
# The superclass uses leading '_' in category names,
# but on the level of this class, the leading '_' is omitted
name.lstrip("_"): category
for name, category in BinaryCIFBlock._deserialize_elements(
content["categories"], "name"
).items()
}
)

def serialize(self):
return {"categories": self._serialize_elements("name")}

def __getitem__(self, key):
# Actual bcif files use leading '_' as categories
return super().__getitem__("_" + key)
try:
return super().__getitem__("_" + key)
except KeyError:
raise KeyError(key)

def __setitem__(self, key, element):
return super().__setitem__("_" + key, element)
try:
return super().__setitem__("_" + key, element)
except KeyError:
raise KeyError(key)

def __delitem__(self, key):
return super().__setitem__("_" + key)
try:
return super().__setitem__("_" + key)
except KeyError:
raise KeyError(key)

def __iter__(self):
return (key.lstrip("_") for key in super().__iter__())
Expand Down
38 changes: 31 additions & 7 deletions src/biotite/structure/io/pdbx/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,17 @@ class CIFBlock(_Component, MutableMapping):
The keys are the category names and the values are the
:class:`CIFCategory` objects.
By default, an empty block is created.
name : str, optional
The name of the block.
This is only used for serialization and is automatically set,
when the :class:`CIFBlock` is added to a :class:`CIFFile`.
It only needs to be set manually, when the block is directly
serialized.

Attributes
----------
name : str
The name of the block.

Notes
-----
Expand All @@ -580,13 +591,15 @@ class CIFBlock(_Component, MutableMapping):
--------

>>> # Add category on creation
>>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})})
>>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}, name="baz")
>>> # Add category later on
>>> block["bar"] = CIFCategory({"another_column": [2, 3]})
>>> # Access a column
>>> print(block["bar"]["another_column"].as_array())
['2' '3']
>>> print(block.serialize())
data_baz
#
_foo.some_column 1
#
loop_
Expand All @@ -596,11 +609,20 @@ class CIFBlock(_Component, MutableMapping):
#
"""

def __init__(self, categories=None):
def __init__(self, categories=None, name=None):
self._name = name
if categories is None:
categories = {}
self._categories = categories

@property
def name(self):
return self._name

@name.setter
def name(self, name):
self._name = name

@staticmethod
def subcomponent_class():
return CIFCategory
Expand Down Expand Up @@ -634,7 +656,10 @@ def deserialize(text):
return CIFBlock(_create_element_dict(lines, category_names, category_starts))

def serialize(self):
text_blocks = []
if self._name is None:
raise SerializationError("Block name is required")
# The block starts with the black name line followed by a comment line
text_blocks = ["data_" + self._name + "\n#\n"]
for category_name, category in self._categories.items():
if isinstance(category, str):
# Category is already stored as lines
Expand Down Expand Up @@ -806,14 +831,12 @@ def deserialize(text):
def serialize(self):
text_blocks = []
for block_name, block in self._blocks.items():
text_blocks.append("data_" + block_name + "\n")
# A comment line is set after the block indicator
text_blocks.append("#\n")
if isinstance(block, str):
# Block is already stored as text
text_blocks.append(block)
else:
try:
block.name = block_name
text_blocks.append(block.serialize())
except Exception:
raise SerializationError(
Expand Down Expand Up @@ -884,6 +907,7 @@ def __getitem__(self, key):
def __setitem__(self, key, block):
if not isinstance(block, CIFBlock):
raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
block.name = key
self._blocks[key] = block

def __delitem__(self, key):
Expand Down Expand Up @@ -921,7 +945,7 @@ def _create_element_dict(lines, element_names, element_starts):
# Lazy deserialization
# -> keep as text for now and deserialize later if needed
return {
element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + "\n"
for i, element_name in enumerate(element_names)
}

Expand Down
8 changes: 4 additions & 4 deletions src/biotite/structure/io/pdbx/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ def _serialize_elements(self, store_key_in=None):
Parameters
----------
store_key_in: str, optional
If given, the key of each element is stored as value in the
serialized element.
This is basically the reverse operation of `take_key_from` in
:meth:`_deserialize_elements()`.
If given, the key of each element is stored as value in the
serialized element.
This is basically the reverse operation of `take_key_from` in
:meth:`_deserialize_elements()`.
"""
serialized_elements = []
for key, element in self._elements.items():
Expand Down
37 changes: 37 additions & 0 deletions tests/structure/test_pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,43 @@ def test_serialization_consistency(format, create_new_encoding):
raise Exception(f"Comparison failed for '{category_name}.{key}'")


@pytest.mark.parametrize(
"format, level", itertools.product(["cif", "bcif"], ["block", "category", "column"])
)
def test_editing(tmpdir, format, level):
"""
Check if editing an existing PDBx file works, by checking if replacing some
category/block/column with a copy of itself does not affect the content.
"""
File = pdbx.CIFFile if format == "cif" else pdbx.BinaryCIFFile
Block = File.subcomponent_class()
Category = Block.subcomponent_class()
Column = Category.subcomponent_class()

column = Column(["a", "b", "c"])
category = Category({"foo_col": column, "bar_col": column, "baz_col": column})
block = Block({"foo_cat": category, "bar_cat": category, "baz_cat": category})
ref_pdbx_file = File({"foo_block": block, "bar_block": block, "baz_block": block})
ref_pdbx_file.write(join(tmpdir, f"original.{format}"))

pdbx_file = File.read(join(tmpdir, f"original.{format}"))
if level == "block":
# Replace block in the mid,
# to check if the block before and after remain the same
pdbx_file["bar_block"] = pdbx_file["bar_block"]
elif level == "category":
pdbx_file["bar_block"]["bar_cat"] = pdbx_file["bar_block"]["bar_cat"]
elif level == "column":
pdbx_file["bar_block"]["bar_cat"]["bar_col"] = pdbx_file["bar_block"][
"bar_cat"
]["bar_col"]
pdbx_file.write(join(tmpdir, f"edited.{format}"))

test_pdbx_file = File.read(join(tmpdir, f"edited.{format}"))
# As the content should not have changed, the serialized files should be identical
assert test_pdbx_file.serialize() == ref_pdbx_file.serialize()


def _clear_encoding(category):
columns = {}
for key, col in category.items():
Expand Down
Loading