Skip to content

adding support for HETATM in caprieval #1279

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/haddock/libs/libalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ def load_coords(
numbering_dic=None,
model2ref_chain_dict=None,
add_resname=None,
keep_hetatm: bool = False,
):
"""Load coordinates from PDB.

Expand All @@ -463,6 +464,9 @@ def load_coords(

add_resname : bool
use the residue name in the identifier

keep_hetatm : bool
Should HETATM lines be extracted ?

Returns
-------
Expand All @@ -475,14 +479,19 @@ def load_coords(
coord_dic: CoordsDict = {}
chain_dic: ResDict = {}
idx: int = 0
# Set types of coordinates lines to extract
if keep_hetatm:
coordinates_line_to_extract = ("ATOM", "HETATM")
else:
coordinates_line_to_extract = ("ATOM", )
# Check filetype
if isinstance(pdb_f, PDBFile):
pdb_f = pdb_f.rel_path
# Read file
with open(pdb_f, "r") as fh:
for line in fh.readlines():
# Skip non ATOM records lines
if not line.startswith("ATOM"):
if not line.startswith(coordinates_line_to_extract):
continue
# Extract PDB line data
atom_name = line[slc_name].strip()
Expand Down
55 changes: 47 additions & 8 deletions src/haddock/modules/analysis/caprieval/capri.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def load_contacts(
cutoff: float = 5.0,
numbering_dic: Optional[dict[str, dict[int, int]]] = None,
model2ref_chain_dict: Optional[dict[str, str]] = None,
keep_hetatm: bool = False,
) -> set[tuple]:
"""Load residue-based contacts.

Expand All @@ -121,6 +122,8 @@ def load_contacts(
PDB file of the model to have its atoms identified
cutoff : float, optional
Cutoff distance for the interface identification.
keep_hetatm : bool
Should HETATM coordinates be extracts ? (default False)

Returns
-------
Expand All @@ -136,6 +139,7 @@ def load_contacts(
atoms,
numbering_dic=numbering_dic,
model2ref_chain_dict=model2ref_chain_dict,
keep_hetatm=keep_hetatm,
)
# create coordinate arrays
coord_arrays: dict[str, NDFloat] = {}
Expand Down Expand Up @@ -217,6 +221,7 @@ def __init__(
self.atoms = self._load_atoms(model, reference, full=self.allatoms)
self.r_chain = params["receptor_chain"]
self.l_chains = params["ligand_chains"]
self.keep_hetatm = params["keep_hetatm"]
self.model2ref_numbering = None
self.model2ref_chain_dict = None
self.output_ss_fname = Path(f"capri_ss_{identificator}.tsv")
Expand All @@ -235,14 +240,21 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
The cutoff distance for the intermolecular contacts.
"""
# Identify reference interface
ref_interface_resdic = self.identify_interface(self.reference, cutoff)
ref_interface_resdic = self.identify_interface(
self.reference,
cutoff,
keep_hetatm=self.keep_hetatm,
)

if len(ref_interface_resdic) == 0:
log.warning("No reference interface found")
else:
# Load interface coordinates
ref_coord_dic, _ = load_coords(
self.reference, self.atoms, ref_interface_resdic
self.reference,
self.atoms,
ref_interface_resdic,
keep_hetatm=self.keep_hetatm,
)
try:
mod_coord_dic, _ = load_coords(
Expand All @@ -251,6 +263,7 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
ref_interface_resdic,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -285,13 +298,18 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:

def calc_lrmsd(self) -> None:
"""Calculate the L-RMSD."""
ref_coord_dic, _ = load_coords(self.reference, self.atoms)
ref_coord_dic, _ = load_coords(
self.reference,
self.atoms,
keep_hetatm=self.keep_hetatm,
)
try:
mod_coord_dic, _ = load_coords(
self.model,
self.atoms,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -390,11 +408,18 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
The cutoff distance for the intermolecular contacts.
"""
# Identify interface
ref_interface_resdic = self.identify_interface(self.reference, cutoff)
ref_interface_resdic = self.identify_interface(
self.reference,
cutoff,
keep_hetatm=self.keep_hetatm,
)
# Load interface coordinates

ref_int_coord_dic, _ = load_coords(
self.reference, self.atoms, ref_interface_resdic
self.reference,
self.atoms,
ref_interface_resdic,
keep_hetatm=self.keep_hetatm,
)
try:
mod_int_coord_dic, _ = load_coords(
Expand All @@ -403,6 +428,7 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
ref_interface_resdic,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -491,14 +517,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
cutoff : float
The cutoff distance for the intermolecular contacts.
"""
ref_contacts = load_contacts(self.reference, cutoff)
ref_contacts = load_contacts(
self.reference,
cutoff,
keep_hetatm=self.keep_hetatm,
)
if len(ref_contacts) != 0:
try:
model_contacts = load_contacts(
self.model,
cutoff,
numbering_dic=self.model2ref_numbering, # type: ignore
model2ref_chain_dict=self.model2ref_chain_dict, # type: ignore
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand All @@ -511,14 +542,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
def calc_global_rmsd(self) -> None:
"""Calculate the full structure RMSD."""
# Load reference atomic coordinates
ref_coord_dic, _ = load_coords(self.reference, self.atoms)
ref_coord_dic, _ = load_coords(
self.reference,
self.atoms,
keep_hetatm=self.keep_hetatm,
)
# Load model atomic coordinates
try:
model_coord_dic, _ = load_coords(
self.model,
self.atoms,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -688,6 +724,7 @@ def _load_atoms(
def identify_interface(
pdb_f: PDBPath,
cutoff: float = 5.0,
keep_hetatm: bool = False,
) -> dict[str, list[int]]:
"""Identify the interface.

Expand All @@ -697,6 +734,8 @@ def identify_interface(
PDB file of the model to have its atoms identified
cutoff : float, optional
Cutoff distance for the interface identification.
keep_hetatm : bool
Should HETATM coordinates be extracts ? (default False)

Returns
-------
Expand All @@ -707,7 +746,7 @@ def identify_interface(
pdb_f = pdb_f.rel_path

interface_resdic: dict[str, list[int]] = {}
contacts = load_contacts(pdb_f, cutoff)
contacts = load_contacts(pdb_f, cutoff, keep_hetatm=keep_hetatm)

for contact in contacts:
first_chain, first_resid, sec_chain, sec_resid = contact
Expand Down
10 changes: 10 additions & 0 deletions src/haddock/modules/analysis/caprieval/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,13 @@ allatoms:
backbone atoms will be considered, otherwise all the heavy-atoms.
group: analysis
explevel: easy

keep_hetatm:
default: false
type: boolean
title: HETATM to be considered during the analysis.
short: HETATM to be considered during the analysis.
long: HETATM to be considered during the analysis. If false (default), only
ATOM coordinates lines will be used, otherwise the HETATM from reference will also be used.
group: analysis
explevel: easy
22 changes: 22 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,28 @@ def fixture_protlig_input_list():
yield [pdb_obj_1, pdb_obj_2]


@pytest.fixture(name="prot_HETATMlig_input_list")
def fixture_prot_HETATMlig_input_list():
"""Protein-Ligand with HETATM input."""
with (
tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_1,
tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_2,
):
dst_prot_1 = tmp_1.name
dst_prot_2 = tmp_2.name

src_prot_1 = Path(golden_data, "protlig_complex_1_HETATM.pdb")
src_prot_2 = Path(golden_data, "protlig_complex_2.pdb")

shutil.copy(src_prot_1, dst_prot_1)
shutil.copy(src_prot_2, dst_prot_2)

pdb_obj_1 = PDBFile(file_name=dst_prot_1, path=Path(dst_prot_1).parent)
pdb_obj_2 = PDBFile(file_name=dst_prot_2, path=Path(dst_prot_2).parent)

yield [pdb_obj_1, pdb_obj_2]


@pytest.fixture(name="protprot_onechain_list")
def fixture_protprot_onechain_list() -> Generator[list[PDBFile], Any, Any]:
"""Protein-Protein complex with a single chain ID."""
Expand Down
Loading