diff --git a/.github/workflows/link-checker.yml b/.github/workflows/link-checker.yml new file mode 100644 index 000000000..46c1d8710 --- /dev/null +++ b/.github/workflows/link-checker.yml @@ -0,0 +1,29 @@ +name: links + +on: + pull_request: + repository_dispatch: + workflow_dispatch: + schedule: + - cron: "00 18 * * *" + +jobs: + linkChecker: + runs-on: ubuntu-latest + permissions: + issues: write + steps: + - uses: actions/cache@v4 + with: + path: .lycheecache + key: cache-lychee-${{ github.sha }} + restore-keys: cache-lychee- + + - uses: actions/checkout@v4 + + - name: Link Checker + id: lychee + uses: lycheeverse/lychee-action@v2 + with: + args: -q './**/*.md' --accept 403,502 --cache + fail: false diff --git a/_config.yml b/_config.yml index 4f2f2c53a..2ae6388b6 100644 --- a/_config.yml +++ b/_config.yml @@ -48,7 +48,7 @@ owner: ad-client: ad-slot: github: haddocking - twitter: amjjbonvin + twitter: amjjbonvin.bsky.social x: amjjbonvin include: [".htaccess"] diff --git a/_data/alumni.yml b/_data/alumni.yml index b429b10ac..22da96cc1 100644 --- a/_data/alumni.yml +++ b/_data/alumni.yml @@ -19,6 +19,11 @@ current: 'Oregon State University, Corvallis OR, USA' status: visiting-professor +- name: Vlad Cojocaru + url: https://starubb.institute.ubbcluj.ro/en/member/cojocaru-vlad-3 + current: 'STAR-UBB Institute, Babeș-Bolyai University, Cluj-Napoca, Romania' + status: Senior Researcher / Data Scientist + - name: João Teixeira current: 'University of Padova, Italy' url: https://fuxreiterlab.github.io/index.html @@ -49,6 +54,9 @@ current: 'Boehringer Ingelheim, Vienna, Austria' status: 'postdoc' +- name: Charlotte van Noort + status: Ph.D Candidate + - name: Jorge Roel url: https://www.ibmb.csic.es/en/department-of-structural-biology-dsb/protein-design-and-modeling current: 'IBMB, Barcelona, Spain' @@ -403,4 +411,10 @@ - name: Tineke Kadijk status: student +- name: Joe Zhang + status: student + +- name: Miguel Sanchez Marin + status: student + diff --git a/_data/members.yml b/_data/members.yml index 4e34ebb72..2ecb2340b 100644 --- a/_data/members.yml +++ b/_data/members.yml @@ -7,9 +7,9 @@ position: IT-Researcher (Software Development and Operations) avatar: /images/people/Rodrigo.jpg -- name: Vlad Cojocaru - position: Senior Researcher / Data Scientist - avatar: /images/people/Vlad-Cojocaru.jpg +- name: Stefan Verhoeven + position: Research Software Engineer (Netherlands eScience Center) + avatar: /images/people/Stefan-Verhoeven.png - name: Marco Giulini position: Postdoctoral Researcher @@ -27,9 +27,11 @@ position: Postdoctoral Researcher avatar: /images/people/Anna-Kravchenko.jpg -- name: Charlotte van Noort - position: Ph.D Candidate - avatar: /images/people/Charlotte.jpg +- name: Your name here? + position: Postdoctoral Researcher + +- name: Your name here? + position: Postdoctoral Researcher - name: Xiaotong Xu position: Ph.D Candidate @@ -39,11 +41,22 @@ position: Ph.D Candidate avatar: /images/people/Anna-Engel.jpg -- name: Miguel Sanchez Marin +- name: Alkis Katsetsiadis position: M.Sc Student - avatar: /images/people/Miguel-Sanchez.jpg + avatar: /images/people/Alkis-Katsetsiadis.png -- name: Joe Zhang +- name: Emile Straat position: M.Sc Student - avatar: /images/people/Joe-Zhang.jpg + avatar: /images/people/Emile-Straat.png +- name: Yara Weldam + position: M.Sc Student + avatar: /images/people/Yara-Weldam.png + +- name: Ilaria-Coratella + position: M.Sc Student + avatar: /images/people/Ilaria-Coratella.png + +- name: Lorenzo Possanzini + position: M.Sc Student + avatar: /images/people/Lorenzo_Possanzini.jpg diff --git a/_includes/_author-bio.html b/_includes/_author-bio.html index e81b6d8d2..09452d326 100644 --- a/_includes/_author-bio.html +++ b/_includes/_author-bio.html @@ -9,16 +9,16 @@ {% endif %}
{{ author.bio }}
{% if author.email %} Email{% endif %} -{% if author.twitter %} Twitter{% endif %} -{% if author.facebook %} Facebook{% endif %} -{% if author.google.plus %} Google+{% endif %} -{% if author.linkedin %} LinkedIn{% endif %} -{% if author.github %} Github{% endif %} +{% if author.twitter%} Bluesky{% endif %} +{% if author.facebook %} Facebook{% endif %} +{% if author.google.plus %} Google+{% endif %} +{% if author.linkedin %} LinkedIn{% endif %} +{% if author.github %} Github{% endif %} {% if author.youtube %} Youtube{% endif %} Subscribe
Supported by:
-assi (selection1) (selection2) distance, lower-bound correction, upper-bound correction +assign (selection1) (selection2) distance, lower-bound correction, upper-bound correctionThe lower limit for the distance is calculated as: distance minus lower-bound correction @@ -332,7 +341,7 @@ Here would be an example of a distance restraint between the CB carbons of resid allowed distance range between 10 and 20Å:
-assi (segid A and resid 10 and name CB) (segid B and resid 200 and name CB) 20.0 10.0 0.0 +assign (segid A and resid 10 and name CB) (segid B and resid 200 and name CB) 20.0 10.0 0.0@@ -351,13 +360,13 @@ create now a distance restraint file suitable for use in HADDOCK.
-assi (segid A and resid 27 and name CA) (segid B and resid 18 and name CA) 23 23 0 -assi (segid A and resid 122 and name CA) (segid B and resid 125 and name CA) 23 23 0 -assi (segid A and resid 122 and name CA) (segid B and resid 128 and name CA) 23 23 0 -assi (segid A and resid 122 and name CA) (segid B and resid 127 and name CA) 23 23 0 -assi (segid A and resid 55 and name CA) (segid B and resid 169 and name CA) 26 26 0 -assi (segid A and resid 55 and name CA) (segid B and resid 179 and name CA) 26 26 0 -assi (segid A and resid 54 and name CA) (segid B and resid 179 and name CA) 26 26 0 +assign (segid A and resid 27 and name CA) (segid B and resid 18 and name CA) 23 23 0 +assign (segid A and resid 122 and name CA) (segid B and resid 125 and name CA) 23 23 0 +assign (segid A and resid 122 and name CA) (segid B and resid 128 and name CA) 23 23 0 +assign (segid A and resid 122 and name CA) (segid B and resid 127 and name CA) 23 23 0 +assign (segid A and resid 55 and name CA) (segid B and resid 169 and name CA) 26 26 0 +assign (segid A and resid 55 and name CA) (segid B and resid 179 and name CA) 26 26 0 +assign (segid A and resid 54 and name CA) (segid B and resid 179 and name CA) 26 26 0@@ -366,10 +375,11 @@ __Note:__ Under Linux (or OSX), this file could be generated automatically from file provided with the data for this tutorial by giving the following command (one line) in a terminal window:_ -cat restraints_filtered.txt | awk \'{if ( NF == 8 ) {print \"assi ( segid \",$1,\" and resid \",$2,\" and name \",$3,\" ) ( segid \",$4,\" and resid \",$5,\" and name \",$6,\" ) \",$8,$8,$7}}\' > restraints_filtered.tbl +cat restraints_filtered.txt | awk \'{if ( NF == 8 ) {print \"assign ( segid \",$1,\" and resid \",$2,\" and name \",$3,\" ) ( segid \",$4,\" and resid \",$5,\" and name \",$6,\" ) \",$8,$8,$7}}\' > restraints_filtered.tbl
+23,24,80,82,84,96,98,100,124,126,128 ++ +Let us visualize this interface on our unbound protein structure. For this start PyMol and load the PDB file of the unbound protein: + + +File menu -> Open -> select 2ZEW_clean.pdb + + +color white, all
+ HADDOCKscore = 1.0 * Evdw + 0.2 * Eelec + 1.0 * Edesol + 0.1 * Eair ++where Evdw is the intermolecular van der Waals energy, Eelec the intermolecular electrostatic energy, Edesol represents an empirical desolvation energy term adapted from Fernandez-Recio *et al.* J. Mol. Biol. 2004, and Eair the AIR energy. + +Consider the cluster scores and their standard deviations. +Is the top ranked cluster significantly better than the second one? What about the third cluster? + +In case the scores of various clusters are within standard deviation from each other, all should be considered as a valid solution for the docking. Ideally, some additional independent experimental information should be available to decide on the best solution. In this case we do have such a piece of information, namely the crystal structure of the complex. + +
+ 2ZEX_l_u.pdb: 1.071 + cluster1_1.pdb: 1.004 + cluster2_1.pdb: 0.968 + cluster10_1.pdb: 0.954 + ... ++
+B 520 CB C 135 CB 0.0 30.0 +B 520 CB C 138 CB 0.0 30.0 +B 520 CB C 141 CB 0.0 30.0 ++ +This is the format used by DisVis to represent the cross-links. Each cross-link definition consists of eight fields: + +* chainID of the 1st molecule +* residue number +* atom name +* chainID of the 2nd molecule +* residue number +* atom name +* lower distance limit +* upper distance limit + + +
From an analysis of the diagonal blocks we can identify the three wHTH domains, whose stucture is well predicted. When considering the off-diagonal blocks, the last domain of C34, wHTH3, seems to be the best defined with respect to C82. We will make use of this in our modelling strategy 2 in this tutorial. Since the orientation of the other domains are not well defined with respect with C82, we will treat them as separate entities during our modelling.
++assi (selection1) (selection2) distance, lower-bound correction, upper-bound correction ++ +The lower limit for the distance is calculated as: distance minus lower-bound correction +and the upper limit as: distance plus upper-bound correction + +The syntax for the selections can combine information about chainID - `segid` keyword -, residue number - `resid` +keyword -, atom name - `name` keyword. +Other keywords can be used in various combinations of OR and AND statements. Please refer for that to the [online CNS manual][link-cns]{:target="_blank"}. + +As an example, a distance restraint between the CB carbons of residues 10 and 200 in chains A and B with an +allowed distance range between 10 and 20Å can be defined as: + +
+assi (segid A and resid 10 and name CB) (segid B and resid 200 and name CB) 20.0 10.0 0.0 ++ + +Can you think of a different way of defining the target distance and its lower and upper corrections while maintaining the same +allowed range? + + + +Under Linux (or OSX), this file can be generated automatically from a disvis restraint input file, e.g. `xlinks-C82-C34.disvis` +file provided with the data for this tutorial by giving the following command (one line) in a terminal window: + + +cat xlinks-C82-C34.disvis| awk '{if (NF == 8) {print "assi (segid ",$1," and resid ",$2," and name ",$3,") (segid ",$4," and resid ",$5," and name ",$6,") ",$8,$8,$7}}' > xlinks-C82-C34.disvis.tbl + + +A pre-generated CNS/HADDOCK formatted restraints file containing all cross-links is available in the `restraints` directory as: + + * `xlinks-all-core-C82-C34-wHTH1-wHTH2-C31-K91-K111.tbl` + +Inspect it (open it as a text file) + + +We used CB atoms to define the restraints in the disvis restraint file. Can you find those is this file? +Are there other atoms defined? What could those be? (Hint... MARTINI) + + +
Additional atoms are included in the distance restraints definitions: `BB`. These correspond to the backbone beads in the MARTINI representation.
+C34 consists of three winged-helix-turn-helix domains which could be docked separately in principle. These are connected by flexible linkers. +The defined restraints impose upper limits to the distance between the C- and N-terminal domains of the the domains. The upper limit was estimated as the number of missing segments/residues * 4.5Å (a typical distance observed in diffraction data for amyloid fibrils, representing a CA-CA distance in an extended conformation). +The same applies to C31 for which only two peptide fragments will be used to be able to make use of the cross-link restraints.
+There are clearly several regions where the two molecules are clashing.
+The fit in chimeraX has clearly removed some of the chain clashes, but there are still regions where the two molecules are clashing (especially considering we don't visualize the side-chains.
++ PolIII-core-C82-C34-wHTH3-chimera-fitted-merged.pdb: 0.9478 + PolIII-core-C82-C34-wHTH3-chimera-fitted-watref.pdb: 0.9427 + PolIII-core-C82-C34-wHTH3-chimera-fitted-CGref.pdb: 0.9517 ++
Both cross-links are violated, but especially the one between core residue 5394 and C82 residue 472 (~70Å!). +The EM fitting solution for C82+C34wHTH3 was well defined according to PowerFit. There seems thus to be discrepancy between the EM and MS data. +Another explanation could be conformational changes in the structures that are not accounted for in our modelling.
+ChimeraX view of the various clusters, superimposed on PolIII core
+The fit is better than in strategy 1, but thre is still one heavily violated cross-link between resid 472 of C82 and resid 5394 of the core. This might well be a false positive. It was not detected by DISVIS because the analysis is only performed for pair of domain and it can be satisfied, while when considering all molecules and all cross-links it can not.
+In the case of C34 wHTH1, all cross-links are satisfied.
+In the case of C34 wHTH2, all cross-links are satisfied.
+All cross-links are now stastified, including the one with C82 that was not in strategy 1.
+View of cluster2_4 in the the EM map (correlation 0.9526). C82 and C34 wHTH3 domain (coral) nicely fit into the density. The other two C34 domains (dark see green and burly wood are found in a region where some density starts to appear seen when playing with the density level, which might indicate some disorder / conformational variability.
+-assi (selection1) (selection2) distance, lower-bound correction, upper-bound correction +assign (selection1) (selection2) distance, lower-bound correction, upper-bound correctionThe lower limit for the distance is calculated as: distance minus lower-bound correction @@ -317,7 +328,7 @@ E. g. a distance restraint between the CB carbons of residues 10 and 200 in chai allowed distance range between 10 and 20Å would be defined as follows:
-assi (segid A and resid 10 and name CB) (segid B and resid 200 and name CB) 20.0 10.0 0.0 +assign (segid A and resid 10 and name CB) (segid B and resid 200 and name CB) 20.0 10.0 0.0@@ -329,14 +340,14 @@ A HADDOCK-compatible distance restraint file based on the cross-links defined ab It contains the following distance restraints (8 in total):
-assi (segid A and resid 40 and name CA ) (not segid A and resid 252 and name CA ) 10.0 7.0 0.0 -assi (segid A and resid 90 and name CA ) (not segid A and resid 176 and name CA ) 10.0 7.0 0.0 -assi (segid A and resid 135 and name CA ) (not segid A and resid 158 and name CA ) 10.0 7.0 0.0 -assi (segid A and resid 161 and name CA ) (not segid A and resid 132 and name CA ) 10.0 7.0 0.0 -assi (segid A and resid 252 and name CA ) (not segid A and resid 40 and name CA ) 10.0 7.0 0.0 -assi (segid A and resid 176 and name CA ) (not segid A and resid 90 and name CA ) 10.0 7.0 0.0 -assi (segid A and resid 158 and name CA ) (not segid A and resid 135 and name CA ) 10.0 7.0 0.0 -assi (segid A and resid 132 and name CA ) (not segid A and resid 161 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 40 and name CA ) (not segid A and resid 252 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 90 and name CA ) (not segid A and resid 176 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 135 and name CA ) (not segid A and resid 158 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 161 and name CA ) (not segid A and resid 132 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 252 and name CA ) (not segid A and resid 40 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 176 and name CA ) (not segid A and resid 90 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 158 and name CA ) (not segid A and resid 135 and name CA ) 10.0 7.0 0.0 +assign (segid A and resid 132 and name CA ) (not segid A and resid 161 and name CA ) 10.0 7.0 0.0Since we might be docking various numbers of monomers - and we thus don't know to which monomer a cross-link should be defined, @@ -389,7 +400,7 @@ PDB structure to submit -> Browse and select *monomer-B.pdb* Segment ID to use during docking -> B -* **Step 4X:** Repeat Step3 as many times to complete the number of molecules you chose in Step2. For this unfold the **Third Molecule menu** and additional ones as needed. +* **Step 4':** Repeat Step4 as many times to complete the number of molecules you chose in Step2. For this unfold the **Third/Fourth/Fifth Molecule menu** and additional ones as needed. XX molecule: where is the structure provided? -> "I am submitting it" @@ -432,6 +443,7 @@ The protein sequence starts at residue 32 and ends at residue 254. Use those num Use this type of restraints -> switch on +Number of CX symmetry segment pairs -> 1 For the symmetry that matches your chosen oligomeric state (C2, C3, C4 or C5), unfold the first CX symmetry segment menu and enter the first and last residue numbers and specify the chainID for each monomer. @@ -451,6 +463,7 @@ Eair 3 -> 1.0
+QAFWKAVTAEFLAMLIFVLLSLGSTINWGGTEKPLPVDMVLISLCFGLSIATMVQCFGHISGGHINPAVTVAMVCTRKISIAKSVFYIAAQCLGAIIGAGILYLVTPPSVVGGLGVTMVHGNLTAGHGLLVELIITFQLVFTIFASCDSKRTDVTGSIALAIGFSVAIGHLFAINYTGASMNPARSFGPAVIMGNWENHWIYWVGPIIGAVLAGGLYEYVFCP ++* **Step 4:** Click on "Continue and preview job" +* **Step 5:** Job Name: X-mer. +* **Step 6:** Switch the seed button on and select a pseudo-random seed for your run. +* **Step 7:** Click on "Confirm and submit job" + +All your submitted jobs will be displayed in a table at the bottom of the page. +This recapitulates their status (running or succeeded), their name and the time they were created. +__Note__ that you can have multiple jobs running concurrently. This will allow you to try-out multiple oligomeric states simply by modifying the number of copies in __step 2__, and re-submitting. + + +Time to grab a cup of tea or a coffee! +And while waiting, try to answer the following questions: + + + How do you interpret AlphaFold's predictions? What are the predicted LDDT (pLDDT), PAE, PTM, iPTM? + + +_Tip_: Try to find information about the prediction confidence at [https://golgi.sandbox.google.com/faq#how-can-i-interpret-confidence-metrics-to-check-the-accuracy-of-structures](https://golgi.sandbox.google.com/faq#how-can-i-interpret-confidence-metrics-to-check-the-accuracy-of-structures) + +Pre-calculated AlphFold3 predictions are provided here. +The single zip archive contains the four different runs tested. +Each run consits of an archive containing information about the run and five predicted models (the naming indicates the rank). + +* [`AlphaFold3Server-runs.zip`](/education/HADDOCK24/XL-MS-oligomer/AlphaFold3Server-runs.zip) + - `fold_dimer.zip`: contains the dimer run results (copies = 2) + - `fold_trimer.zip`: contains the trimer run results (copies = 3) + - `fold_tetramer.zip`: contains the tetramer run results (copies = 4) + - `fold_pentamer.zip`: contains the pentamer run results (copies = 5) + + +### Analysis of the generated models + +Once the run is finished and the status reached succeeded, you can click on one of the table rows to go to the provided visualizing tool. + +On the top, best model metrics will be shown (ipTM and PTM), and in the visualizer, residues are colored based on their pLDDT values. + +Take time to look at the generated models and the arrangement of the various monomers. When submitting our prediction, we only defined the number of monomers, but not the symmetry. + + + Does AlphaFold3 generate symmetrical solutions? Compare results from different oligomeric states. + + +Consider the `iPTM` score (value between 0 and 1) of the various oligomeric states (assuming that you run the notebook with different oligomeric states). + + + Which oligomeric state results in the highest iPTM score? + + +
+Dimer: ipTM = 0.77, pTM = 0.86 +Trimer: ipTM = 0.83, pTM = 0.86 +Tetramer: ipTM = 0.90, pTM = 0.91 +Pentamer: ipTM = 0.74, pTM = 0.78 ++ +
-(haddock3)$ haddock3 -h -usage: haddock3 [-h] [--restart RESTART] [--extend-run EXTEND_RUN] [--setup] - [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-v] - recipe - -positional arguments: - recipe The input recipe file path - -optional arguments: - -h, --help show this help message and exit - --restart RESTART Restart the run from a given step. Previous folders from the - selected step onward will be deleted. - --extend-run EXTEND_RUN - Start a run from a run directory previously prepared with the - `haddock3-copy` CLI. Provide the run directory created with - `haddock3-copy` CLI. - --setup Only setup the run, do not execute - --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL} - -v, --version show version --
- ============================================================ - | | - | Crystallography & NMR System (CNS) | - | CNSsolve | - | | - ============================================================ - Version: 1.3 at patch level U - Status: Special UU release with Rg, paramagnetic - and Z-restraints (A. Bonvin, UU 2013) - ============================================================ - Written by: A.T.Brunger, P.D.Adams, G.M.Clore, W.L.DeLano, - P.Gros, R.W.Grosse-Kunstleve,J.-S.Jiang,J.M.Krahn, - J.Kuszewski, M.Nilges, N.S.Pannu, R.J.Read, - L.M.Rice, G.F.Schroeder, T.Simonson, G.L.Warren. - Copyright (c) 1997-2010 Yale University - ============================================================ - Running on machine: hostname unknown (Linux,64-bit) - Program started by: l00902 - Program started at: 16:34:22 on 06-Dec-2023 - ============================================================ - - FFT3C: Using FFTPACK4.1 - -CNSsolve> --
-assi (selection1) (selection2) distance, lower-bound correction, upper-bound correction -- -The lower limit for the distance is calculated as: distance minus lower-bound correction -and the upper limit as: distance plus upper-bound correction - -The syntax for the selections can combine information about: - -* chainID - `segid` keyword -* residue number - `resid` keyword -* atom name - `name` keyword. - -Other keywords can be used in various combinations of OR and AND statements. Please refer for that to the [online CNS manual][link-cns]{:target="_blank"}. - -E. g. a distance restraint between the CB carbons of residues 10 and 200 in chains A and B with an -allowed distance range between 10 and 20Å would be defined as follows: - -
-assi (segid A and resid 10 and name CB) (segid B and resid 200 and name CB) 20.0 10.0 0.0 -- - -Can you think of a different way of defining the distance and lower and upper corrections while maintaining the same -allowed range? - - - -
-31,32,33,34,35,52,54,55,56,100,101,102,103,104,105,106,151,152,169,170,173,211,212,213,214,216 -- -The numbering corresponds to the numbering of the `4G6K_clean.pdb` PDB file. - -Let us visualize those onto the 3D structure. -For this start PyMOL and load `4G6K_clean.pdb` - - -File menu -> Open -> select 4G6K_clean.pdb - - -Alternatively, if PyMol is accessible from the command line simply type: - - -pymol 4G6K_clean.pdb - - -We will now highlight the predicted paratope. In PyMOL type the following commands: - - -color white, all - - -select paratope, (resi 31+32+33+34+35+52+54+55+56+100+101+102+103+104+105+106+151+152+169+170+173+211+212+213+214+216)
-72,73,74,75,81,83,84,89,90,92,94,96,97,98,115,116,117 -- -We will now visualize the epitope on Interleukin-1β. For this start PyMOL and from the PyMOL File menu open the provided PDB file of the antigen. - - -File menu -> Open -> select 4I1B_clean.pdb - - - -color white, all - - -show surface - - -select epitope, (resi 72+73+74+75+81+83+84+89+90+92+94+96+97+98+115+116+117) - - -color red, epitope - - -Inspect the surface. - - -Do the identified residues form a well defined patch on the surface? - - -The answer to that question should be yes, but we can see some residues not colored that might also be involved in the binding - there are some white spots around/in the red surface. - -
-1 32 33 34 35 52 54 55 56 100 101 102 103 104 105 106 151 152 169 170 173 211 212 213 214 216 - -- -* For the antigen we will use the NMR-identified epitope as active and the surface neighbors as passive. The corresponding file can be found in the `restraints` directory as `antigen-NMR-epitope.act-pass`: - -
-72 73 74 75 81 83 84 89 90 92 94 96 97 98 115 116 117 -3 24 46 47 48 50 66 76 77 79 80 82 86 87 88 91 93 95 118 119 120 -- -Using those two files, we can generate the CNS-formatted AIR restraint files with the following command: - - -haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.act-pass > ambig-paratope-NMR-epitope.tbl - - -This generates a file called `ambig-paratope-NMR-epitope.tbl` that contains the AIR -restraints. - - -Inspect the generated file and note how the ambiguous distances are defined. - - -
-assign (resi 31 and segid A) -( - (resi 72 and segid B) - or - (resi 73 and segid B) - or - (resi 74 and segid B) - or - (resi 75 and segid B) - or - (resi 81 and segid B) - or - (resi 83 and segid B) - or - (resi 84 and segid B) - or - (resi 89 and segid B) - or - (resi 90 and segid B) - or - (resi 92 and segid B) - or - (resi 94 and segid B) - or - (resi 96 and segid B) - or - (resi 97 and segid B) - or - (resi 98 and segid B) - or - (resi 115 and segid B) - or - (resi 116 and segid B) - or - (resi 117 and segid B) - or - (resi 3 and segid B) - or - (resi 24 and segid B) - or - (resi 46 and segid B) - or - (resi 47 and segid B) - or - (resi 48 and segid B) - or - (resi 50 and segid B) - or - (resi 66 and segid B) - or - (resi 76 and segid B) - or - (resi 77 and segid B) - or - (resi 79 and segid B) - or - (resi 80 and segid B) - or - (resi 82 and segid B) - or - (resi 86 and segid B) - or - (resi 87 and segid B) - or - (resi 88 and segid B) - or - (resi 91 and segid B) - or - (resi 93 and segid B) - or - (resi 95 and segid B) - or - (resi 118 and segid B) - or - (resi 119 and segid B) - or - (resi 120 and segid B) -) 2.0 2.0 0.0 -... --
- assign (segid A and resi 110 and name CA) (segid A and resi 132 and name CA) 47.578 0.0 0.0 - assign (segid A and resi 97 and name CA) (segid A and resi 204 and name CA) 33.405 0.0 0.0 -- -This file is also provided in the `restraints` directory. - - -
-model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq cluster-id cluster-ranking model-cluster-ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg sym total vdw vean xpcs -../06_emref/emref_4.pdb - 1 -145.275 1.034 0.879 2.856 1.929 0.819 - - - 69.699 0.000 0.000 1976.860 0.000 0.000 0.000 6.775 0.000 -493.142 0.000 0.000 0.000 0.000 -483.835 -60.391 0.000 0.000 -../06_emref/emref_6.pdb - 2 -132.350 1.255 0.828 2.269 2.166 0.783 - - - 67.962 0.000 0.000 1882.240 0.000 0.000 0.000 11.799 0.000 -510.162 0.000 0.000 0.000 0.000 -491.112 -48.912 0.000 0.000 -../06_emref/emref_1.pdb - 3 -131.650 0.822 0.862 2.060 1.269 0.858 - - - 79.712 0.000 0.000 1879.870 0.000 0.000 0.000 -2.596 0.000 -487.314 0.000 0.000 0.000 0.000 -447.165 -39.562 0.000 0.000 -../06_emref/emref_2.pdb - 4 -129.551 1.296 0.776 2.308 2.364 0.760 - - - 129.685 0.000 0.000 1862.120 0.000 0.000 0.000 1.240 0.000 -508.711 0.000 0.000 0.000 0.000 -421.043 -42.018 0.000 0.000 -... -- -If clustering was performed prior to calling the `caprieval` module the `capri_ss.tsv` file will also contain information about to which cluster the model belongs to and its ranking within the cluster. - - -The relevant statistics are: - -* **score**: *the HADDOCK score (arbitrary units)* -* **irmsd**: *the interface RMSD, calculated over the interfaces the molecules* -* **fnat**: *the fraction of native contacts* -* **lrmsd**: *the ligand RMSD, calculated on the ligand after fitting on the receptor (1st component)* -* **ilrmsd**: *the interface-ligand RMSD, calculated over the interface of the ligand after fitting on the interface of the receptor (more relevant for small ligands for example)* -* **dockq**: *the DockQ score, which is a combination of irmsd, lrmsd and fnat and provides a continuous scale between 1 (exactly equal to reference) and 0* - -Various other terms are also reported including: - -* **bsa**: *the buried surface area in squared Angstromn* -* **elec**: *the intermolecular electrostatic energy* -* **vdw**: *the intermolecular van der Waals energy* -* **desolv**: *the desolvation energy* - - -The iRMSD, lRMSD and Fnat metrics are the ones used in the blind protein-protein prediction experiment [CAPRI](https://capri.ebi.ac.uk/){:target="_blank"} (Critical PRediction of Interactions). - -In CAPRI the quality of a model is defined as (for protein-protein complexes): - -* **acceptable model**: i-RMSD < 4Å or l-RMSD<10Å and Fnat > 0.1 (0.23 < DOCKQ < 0.49) -* **medium quality model**: i-RMSD < 2Å or l-RMSD<5Å and Fnat > 0.3 (0.49 < DOCKQ < 0.8) -* **high quality model**: i-RMSD < 1Å or l-RMSD<1Å and Fnat > 0.5 (DOCKQ > 0.8) - - -What is based on this CAPRI criterion the quality of the best model listed above (emref_6.pdb)? - - -In case the `caprieval` module is called after a clustering step an additional file will be present in the directory: `capri_clt.tsv`. -This file contains the cluster ranking and score statistics, averaged over the minimum number of models defined for clustering -(4 by default), with their corresponding standard deviations. E.g.: - -
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 1 13 - -134.706 6.188 1.102 0.190 0.836 0.039 2.373 0.294 0.805 0.037 86.764 25.183 1900.272 44.896 4.305 5.461 -499.832 9.836 -460.789 28.355 -47.721 8.079 1 -2 2 10 - -103.801 6.412 5.072 0.072 0.125 0.007 11.718 0.592 0.184 0.008 119.323 25.301 1602.258 76.499 8.380 2.269 -300.700 36.597 -245.350 36.102 -63.973 3.596 2 -3 3 9 - -96.625 5.281 9.931 0.292 0.069 0.012 19.389 0.457 0.084 0.005 163.724 58.987 1525.055 31.387 2.803 1.489 -367.539 27.148 -246.109 63.575 -42.293 5.960 3 -4 4 4 - -96.089 15.346 14.725 0.026 0.090 0.007 23.152 0.311 0.073 0.003 174.066 46.134 1793.750 120.512 3.950 1.692 -332.615 23.798 -209.472 70.181 -50.923 6.901 4 -- - -In this file you find the cluster rank, the cluster ID (which is related to the size of the cluster, 1 being always the largest cluster), the number of models (n) in the cluster and the corresponding statistics (averages + standard deviations). The corresponding cluster PDB files will be found in the processing `09_seletopclusts` directory. - -While these simple text file can be easily checked from the command line already, they might be cumbersome to read. For that reason we have developed a post-processing analysis that automatically generates html reports for all `caprieval` steps in the workflow. These are located in the respective `analysis/XX_caprieval` directories and can be viewed using your favorite web browser. - - - -
-============================================== -== run1/02_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 16 out of 50 -Total number of medium or better models: 12 out of 50 -Total number of high quality models: 0 out of 50 - -First acceptable model - rank: 4 i-RMSD: 1.229 Fnat: 0.707 DockQ: 0.744 -First medium model - rank: 4 i-RMSD: 1.229 Fnat: 0.707 DockQ: 0.744 -Best model - rank: 21 i-RMSD: 1.154 Fnat: 0.828 DockQ: 0.795 -============================================== -== run1/05_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 16 out of 50 -Total number of medium or better models: 12 out of 50 -Total number of high quality models: 2 out of 50 - -First acceptable model - rank: 1 i-RMSD: 0.778 Fnat: 0.897 DockQ: 0.877 -First medium model - rank: 1 i-RMSD: 0.778 Fnat: 0.897 DockQ: 0.877 -Best model - rank: 1 i-RMSD: 0.778 Fnat: 0.897 DockQ: 0.877 -============================================== -== run1/07_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 16 out of 50 -Total number of medium or better models: 12 out of 50 -Total number of high quality models: 2 out of 50 - -First acceptable model - rank: 1 i-RMSD: 1.034 Fnat: 0.879 DockQ: 0.819 -First medium model - rank: 1 i-RMSD: 1.034 Fnat: 0.879 DockQ: 0.819 -Best model - rank: 3 i-RMSD: 0.822 Fnat: 0.862 DockQ: 0.858 -============================================== --
- In terms of iRMSD values we only observe very small differences in the best model. The fraction of native contacts and the DockQ scores are however improving much more after flexible refinement, but increases again slightly after final minimisation. All this will of course depend on how different are the bound and unbound conformations and the amount of data used to drive the docking process. In general, from our experience, the more and better data at hand, the larger the conformational changes that can be induced. -
-- This is not the case. The scoring function is not perfect, but does a reasonable job in ranking models of acceptable or better quality on top in this case. -
-Top-ranked model of the top cluster superimposed onto the reference crystal structure (in yellow)
-- 4G6K_abb_clean RMSD = 0.428 Å - 4G6K_af2_clean RMSD = 0.765 Å --
- 4G6K_abb_clean RMSD = 0.330 Å - 4G6K_af2_clean RMSD = 0.675 Å - 4G6K_clean RMSD = 0.393 Å --
-============================================== -== run1/10_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 1 out of 4 -Total number of medium or better clusters: 1 out of 4 -Total number of high quality clusters: 0 out of 4 - -First acceptable cluster - rank: 1 i-RMSD: 1.102 Fnat: 0.836 DockQ: 0.805 -First medium cluster - rank: 1 i-RMSD: 1.102 Fnat: 0.836 DockQ: 0.805 -Best cluster - rank: 1 i-RMSD: 1.102 Fnat: 0.836 DockQ: 0.805 - -============================================== -== run1-abb/10_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 1 out of 5 -Total number of medium or better clusters: 1 out of 5 -Total number of high quality clusters: 0 out of 5 - -First acceptable cluster - rank: 1 i-RMSD: 1.103 Fnat: 0.832 DockQ: 0.797 -First medium cluster - rank: 1 i-RMSD: 1.103 Fnat: 0.832 DockQ: 0.797 -Best cluster - rank: 1 i-RMSD: 1.103 Fnat: 0.832 DockQ: 0.797 - -============================================== -== run1-af2/10_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 2 out of 5 -Total number of medium or better clusters: 0 out of 5 -Total number of high quality clusters: 0 out of 5 - -First acceptable cluster - rank: 1 i-RMSD: 2.956 Fnat: 0.375 DockQ: 0.413 -Best cluster - rank: 3 i-RMSD: 2.903 Fnat: 0.375 DockQ: 0.327 --
-============================================== -== run1/07_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 16 out of 50 -Total number of medium or better models: 12 out of 50 -Total number of high quality models: 2 out of 50 - -First acceptable model - rank: 1 i-RMSD: 1.034 Fnat: 0.879 DockQ: 0.819 -First medium model - rank: 1 i-RMSD: 1.034 Fnat: 0.879 DockQ: 0.819 -Best model - rank: 3 i-RMSD: 0.822 Fnat: 0.862 DockQ: 0.858 - -============================================== -== run1-abb/07_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 13 out of 50 -Total number of medium or better models: 10 out of 50 -Total number of high quality models: 2 out of 50 - -First acceptable model - rank: 1 i-RMSD: 1.249 Fnat: 0.793 DockQ: 0.773 -First medium model - rank: 1 i-RMSD: 1.249 Fnat: 0.793 DockQ: 0.773 -Best model - rank: 4 i-RMSD: 0.901 Fnat: 0.862 DockQ: 0.857 - -============================================== -== run1-af2/07_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 23 out of 50 -Total number of medium or better models: 1 out of 50 -Total number of high quality models: 0 out of 50 - -First acceptable model - rank: 2 i-RMSD: 2.492 Fnat: 0.466 DockQ: 0.508 -First medium model - rank: 22 i-RMSD: 1.780 Fnat: 0.448 DockQ: 0.592 -Best model - rank: 22 i-RMSD: 1.780 Fnat: 0.448 DockQ: 0.592 --
-============================================== -== run1-ens-clst//12_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 3 out of 7 -Total number of medium or better clusters: 1 out of 7 -Total number of high quality clusters: 0 out of 7 - -First acceptable cluster - rank: 1 i-RMSD: 1.276 Fnat: 0.828 DockQ: 0.779 -First medium cluster - rank: 1 i-RMSD: 1.276 Fnat: 0.828 DockQ: 0.779 -Best cluster - rank: 1 i-RMSD: 1.276 Fnat: 0.828 DockQ: 0.779 --
-============================================== -== run1-ens-clst//02_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 50 out of 150 -Total number of medium or better models: 25 out of 150 -Total number of high quality models: 0 out of 150 - -First acceptable model - rank: 3 i-RMSD: 1.273 Fnat: 0.672 DockQ: 0.716 -First medium model - rank: 3 i-RMSD: 1.273 Fnat: 0.672 DockQ: 0.716 -Best model - rank: 46 i-RMSD: 1.154 Fnat: 0.828 DockQ: 0.795 -============================================== -== run1-ens-clst//05_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 28 out of 68 -Total number of medium or better models: 10 out of 68 -Total number of high quality models: 0 out of 68 - -First acceptable model - rank: 3 i-RMSD: 1.273 Fnat: 0.672 DockQ: 0.716 -First medium model - rank: 3 i-RMSD: 1.273 Fnat: 0.672 DockQ: 0.716 -Best model - rank: 15 i-RMSD: 1.229 Fnat: 0.707 DockQ: 0.744 -============================================== -== run1-ens-clst//07_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 27 out of 68 -Total number of medium or better models: 10 out of 68 -Total number of high quality models: 3 out of 68 - -First acceptable model - rank: 1 i-RMSD: 1.454 Fnat: 0.759 DockQ: 0.720 -First medium model - rank: 1 i-RMSD: 1.454 Fnat: 0.759 DockQ: 0.720 -Best model - rank: 22 i-RMSD: 0.908 Fnat: 0.776 DockQ: 0.803 --
-QVQLQESGPGLVKPSQTLSLTCSFSGFSLSTSGMGVGWIRQPSGKGLEWLAHIWWDGDES -YNPSLKSRLTISKDTSKNQVSLKITSVTAADTAVYFCARNRYDPPWFVDWGQGTLVTVSS -- -* Antibody light chain: -
-DIQMTQSTSSLSASVGDRVTITCRASQDISNYLSWYQQKPGKAVKLLIYYTSKLHSGVPS -RFSGSGSGTDYTLTISSLQQEDFATYFCLQGKMLPWTFGQGTKLEIK -- -* Antigen: -
-VRSLNCTLRDSQQKSLVMSGPYELKALHLQGQDMEQQVVFSMSFVQGEESNDKIPVALGL -KEKNLYLSCVLKDDKPTLQLESVDPKNYPKKKMEKRFVFNKIEINNKLEFESAQFPNWYI -STSQAENMPVFLGGTKGGQDITDFTMQFVSS --
-QVQLQESGPGLVKPSQTLSLTCSFSGFSLSTSGMGVGWIRQPSGKGLEWLAHIWWDGDESYNPSLKSRLTISKDTSKNQVSLKITSVTAADTAVYFCARNRYDPPWFVDWGQGTLVTVSS:DIQMTQSTSSLSASVGDRVTITCRASQDISNYLSWYQQKPGKAVKLLIYYTSKLHSGVPSRFSGSGSGTDYTLTISSLQQEDFATYFCLQGKMLPWTFGQGTKLEIK:VRSLNCTLRDSQQKSLVMSGPYELKALHLQGQDMEQQVVFSMSFVQGEESNDKIPVALGLKEKNLYLSCVLKDDKPTLQLESVDPKNYPKKKMEKRFVFNKIEINNKLEFESAQFPNWYISTSQAENMPVFLGGTKGGQDITDFTMQFVSS -- - - -Define the _jobname_, e.g. Ab-Ag - - - -In the _Advanced settings_ block you can check the option to save the results to your Google Drive (if you have an account) - - - -In the top section of the Colab, click: _Runtime > Run All_ - - -(It may give a warning that this is not authored by Google, because it is pulling code from GitHub - you can ignore it). - -This will automatically install, configure and run AlphaFold for you - leave this window open. -After the prediction complete you will be asked to download a zip-archive with the results (if you configured it to use Google Drive, a result archive will be automatically saved to your Google Drive). - -
- Model1: pLDDT=90.4 pTM=0.654 ipTM=0.525 - Model2: pLDDT=88.0 pTM=0.65 ipTM=0.522 - Model3: pLDDT=88.2 pTM=0.647 ipTM=0.52 - Model4: pLDDT=88.0 pTM=0.644 ipTM=0.516 - Model5: pLDDT=88.1 pTM=0.641 ipTM=0.512 --
Analysis report of step 10_caprieval
-assign (selection1) (selection2) distance, lower-bound correction, upper-bound correction -- -The lower limit for the distance is calculated as: distance minus lower-bound -correction and the upper limit as: distance plus upper-bound correction. The -syntax for the selections can combine information about chainID - `segid` -keyword -, residue number - `resid` keyword -, atom name - `name` keyword. -Other keywords can be used in various combinations of OR and AND statements. -Please refer for that to the [online CNS manual](http://cns-online.org/v1.3/){:target="_blank"}. - -We will shortly explain in this section how to generate both ambiguous -interaction restraints (AIRs) and specific distance restraints for use in -HADDOCK illustrating a scenario in which no _a priori_ knowledge is available -about the antibody binding site, but in which the antigen epitope has been pinpointed -by an NMR chemical shift perturbation experiment. - -Information about various types of distance restraints in HADDOCK can also be -found in our [online manual][air-help]{:target="_blank"} pages. - -
-31,32,33,34,35,52,54,55,56,100,101,102,103,104,105,106,151,152,169,170,173,211,212,213,214,216 -- -The numbering corresponds to the numbering of the `4G6K_clean.pdb` PDB file. - -Let us visualize those onto the 3D structure. -For this start PyMOL and load `4G6K_clean.pdb` - - -File menu -> Open -> select 4G6K_clean.pdb - - -We will now highlight the predicted paratope. In PyMOL type the following commands: - - -color white, all - - -select paratope, (resi 31+32+33+34+35+52+54+55+56+100+101+102+103+104+105+106+151+152+169+170+173+211+212+213+214+216)
-72,73,74,75,81,83,84,89,90,92,94,96,97,98,115,116,117 -- -We will now visualize the epitope on Interleukin-1β. For this start PyMOL and from the PyMOL File menu open the provided PDB file of the antigen. - - -File menu -> Open -> select 4I1B_clean.pdb - - - -color white, all - - -show surface - - -select epitope, (resi 72+73+74+75+81+83+84+89+90+92+94+96+97+98+115+116+117) - - -color red, epitope - - -Inspect the surface. - - -Do the identified residues form a well defined patch on the surface? - - -The answer to that question should be yes, but we can see some residues not colored that might also be involved in the binding - there are some white spots around/in the red surface. - -
-1 32 33 34 35 52 54 55 56 100 101 102 103 104 105 106 151 152 169 170 173 211 212 213 214 216 - -- -* and for the antigen (the file called `antigen-NMR-epitope.act-pass` from the `restraints` directory): -
-72 73 74 75 81 83 84 89 90 92 94 96 97 98 115 116 117 -3 24 46 47 48 50 66 76 77 79 80 82 86 87 88 91 93 95 118 119 120 -- -Using those two files, we can generate the CNS-formatted AIR restraint files with the following command: - - -haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.act-pass > ambig-paratope-NMR-epitope.tbl - - -This generates a file called `ambig-paratope-NMR-epitope.tbl` that contains the AIR -restraints. The default distance range for those is between 0 and 2Å, which -might seem short but makes senses because of the 1/r^6 summation in the AIR -energy function that makes the effective distance be significantly shorter than -the shortest distance entering the sum. - -The effective distance is calculated as the SUM over all pairwise atom-atom -distance combinations between an active residue and all the active+passive on -the other molecule: SUM[1/r^6]^(-1/6). - -If you modify manually this file, it is possible to quickly check if the format is valid. - - -haddock3-restraints validate_tbl ambig-paratope-NMR-epitope.tbl --silent - - -No output means that your TBL file is valid. - -
- assign (segid A and resi 110 and name CA) (segid A and resi 132 and name CA) 47.578 0.0 0.0 - assign (segid A and resi 97 and name CA) (segid A and resi 204 and name CA) 33.405 0.0 0.0 -- -This file is also provided in the `restraints` directory of the archive you downloaded. - -If you are considering Alphafold2 or ABodyBuilder2 antibodies you have to create the appropriate distance restraints: - - -haddock3-restraints restrain_bodies 4G6K_af2_clean.pdb > af2-antibody-unambig.tbl - - - -haddock3-restraints restrain_bodies 4G6K_abb_clean.pdb > abb-antibody-unambig.tbl - - -
-model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq air bsa desolv elec total vdw -../06_emref/emref_2.pdb - 1 -164.078 2.111 0.621 5.456 4.368 0.555 96.928 2077.920 6.253 -584.597 -550.774 -63.105 -../06_emref/emref_8.pdb - 2 -144.476 1.472 0.759 2.659 2.691 0.726 43.505 2018.670 5.884 -549.010 -550.413 -44.908 -../06_emref/emref_4.pdb - 3 -138.888 1.087 0.724 2.888 1.830 0.759 116.384 1817.670 2.875 -558.618 -483.913 -41.678 -../06_emref/emref_3.pdb - 4 -138.860 0.983 0.931 1.826 1.554 0.862 100.098 1822.150 1.606 -503.198 -452.935 -49.836 -../06_emref/emref_1.pdb - 5 -138.754 1.146 0.828 1.738 1.846 0.806 36.138 1924.090 5.316 -528.536 -534.375 -41.976 -../06_emref/emref_5.pdb - 6 -138.362 0.921 0.914 1.817 1.482 0.866 73.832 1897.950 4.279 -484.430 -463.736 -53.138 -../06_emref/emref_6.pdb - 7 -138.054 1.153 0.862 2.220 1.880 0.809 63.112 1958.060 5.507 -529.438 -510.311 -43.985 -../06_emref/emref_9.pdb - 8 -134.536 1.313 0.810 2.508 2.239 0.765 63.951 1862.230 7.050 -522.799 -502.269 -43.421 -../06_emref/emref_11.pdb - 9 -131.577 0.965 0.862 1.337 1.428 0.848 58.716 1905.400 9.684 -519.910 -504.344 -43.151 -.... -- -If clustering was performed prior to calling the `caprieval` module the `capri_ss.tsv` file will also contain information about to which cluster the model belongs to and its ranking within the cluster. - -The relevant statistics are: - -* **score**: *the HADDOCK score (arbitrary units)* -* **irmsd**: *the interface RMSD, calculated over the interfaces the molecules* -* **fnat**: *the fraction of native contacts* -* **lrmsd**: *the ligand RMSD, calculated on the ligand after fitting on the receptor (1st component)* -* **ilrmsd**: *the interface-ligand RMSD, calculated over the interface of the ligand after fitting on the interface of the receptor (more relevant for small ligands for example)* -* **dockq**: *the DockQ score, which is a combination of irmsd, lrmsd and fnat and provides a continuous scale between 1 (exactly equal to reference) and 0* - -The iRMSD, lRMSD and Fnat metrics are the ones used in the blind protein-protein prediction experiment [CAPRI](https://capri.ebi.ac.uk/) (Critical PRediction of Interactions). - -In CAPRI the quality of a model is defined as (for protein-protein complexes): - -* **acceptable model**: i-RMSD < 4Å or l-RMSD<10Å and Fnat > 0.1 (0.23 < DOCKQ < 0.49) -* **medium quality model**: i-RMSD < 2Å or l-RMSD<5Å and Fnat > 0.3 (0.49 < DOCKQ < 0.8) -* **high quality model**: i-RMSD < 1Å or l-RMSD<1Å and Fnat > 0.5 (DOCKQ > 0.8) - - -What is based on this CAPRI criterion the quality of the best model listed above (emref_6.pdb)? - - -In case the `caprieval` module is called after a clustering step an additional file will be present in the directory: `capri_clt.tsv`. -This file contains the cluster ranking and score statistics, averaged over the minimum number of models defined for clustering -(4 by default), with their corresponding standard deviations. E.g.: - -
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 2 10 - -139.014 7.386 1.426 0.182 0.746 0.081 3.235 0.650 0.715 0.056 131.826 51.848 2002.760 76.340 8.397 4.920 -584.336 90.832 -496.236 89.379 -43.727 11.464 1 -2 3 10 - -120.115 6.139 14.964 0.018 0.069 0.000 23.390 0.342 0.065 0.001 189.120 18.758 1998.883 56.075 4.601 5.111 -426.788 71.303 -295.939 64.795 -58.270 8.018 2 -3 1 19 - -86.814 2.027 8.747 0.451 0.112 0.019 16.725 0.548 0.115 0.010 203.898 11.457 1554.495 32.501 7.527 1.994 -355.098 23.298 -194.910 27.573 -43.710 4.911 3 -... -- - -In this file you find the cluster rank, the cluster ID (which is related to the size of the cluster, 1 being always the largest cluster), the number of models (n) in the cluster and the corresponding statistics (averages + standard deviations). The corresponding cluster PDB files will be found in the processing `09_seletopclusts` directory. - -
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 1 17 - -146.575 10.361 1.413 0.442 0.759 0.112 3.207 1.357 0.726 0.110 89.229 27.411 1934.102 116.109 4.155 1.970 -548.856 29.400 -509.509 42.520 -49.882 8.168 1 -2 2 15 - -108.943 2.131 4.978 0.092 0.134 0.014 11.239 0.427 0.194 0.009 158.010 45.857 1670.585 42.916 8.673 2.771 -344.907 20.265 -251.333 32.787 -64.436 2.947 2 -3 3 5 - -96.132 13.387 9.913 0.553 0.077 0.019 19.462 0.471 0.087 0.009 155.613 56.813 1460.395 15.293 1.130 1.740 -348.077 48.270 -235.672 68.325 -43.208 9.309 3 -4 4 4 - -87.709 10.400 14.477 0.276 0.073 0.026 23.422 0.445 0.067 0.010 99.097 12.356 1602.033 180.993 6.684 2.524 -311.045 11.794 -254.042 22.508 -42.094 8.503 4 --
-============================================== -== runs/run1-CDR-NMR-CSP/10_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 1 out of 4 -Total number of medium or better clusters: 1 out of 4 -Total number of high quality clusters: 0 out of 4 - -First acceptable cluster - rank: 1 i-RMSD: 1.413 Fnat: 0.759 DockQ: 0.726 -First medium cluster - rank: 1 i-RMSD: 1.413 Fnat: 0.759 DockQ: 0.726 -Best cluster - rank: 1 i-RMSD: 1.413 Fnat: 0.759 DockQ: 0.726 --
-============================================== -== ./runs/run1-CDR-NMR-CSP/02_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 25 out of 96 -Total number of medium or better models: 15 out of 96 -Total number of high quality models: 0 out of 96 - -First acceptable model - rank: 1 i-RMSD: 2.504 Fnat: 0.328 DockQ: 0.405 -First medium model - rank: 5 i-RMSD: 1.169 Fnat: 0.828 DockQ: 0.788 -Best model - rank: 13 i-RMSD: 1.013 Fnat: 0.672 DockQ: 0.735 -============================================== -== ./runs/run1-CDR-NMR-CSP/05_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 18 out of 48 -Total number of medium or better models: 15 out of 48 -Total number of high quality models: 4 out of 48 - -First acceptable model - rank: 1 i-RMSD: 1.107 Fnat: 0.810 DockQ: 0.805 -First medium model - rank: 1 i-RMSD: 1.107 Fnat: 0.810 DockQ: 0.805 -Best model - rank: 10 i-RMSD: 0.857 Fnat: 0.810 DockQ: 0.848 -============================================== -== ./runs/run1-CDR-NMR-CSP/07_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 18 out of 48 -Total number of medium or better models: 15 out of 48 -Total number of high quality models: 5 out of 48 - -First acceptable model - rank: 1 i-RMSD: 2.111 Fnat: 0.621 DockQ: 0.555 -First medium model - rank: 2 i-RMSD: 1.472 Fnat: 0.759 DockQ: 0.726 -Best model - rank: 6 i-RMSD: 0.921 Fnat: 0.914 DockQ: 0.866 -============================================== -== ./runs/run1-CDR-NMR-CSP/10_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 17 out of 41 -Total number of medium or better models: 15 out of 41 -Total number of high quality models: 5 out of 41 - -First acceptable model - rank: 1 i-RMSD: 2.111 Fnat: 0.621 DockQ: 0.555 -First medium model - rank: 2 i-RMSD: 1.472 Fnat: 0.759 DockQ: 0.726 -Best model - rank: 6 i-RMSD: 0.921 Fnat: 0.914 DockQ: 0.866 --
- In terms of iRMSD values we only observe very small differences in the best model. The fraction of native contacts and the DockQ scores are however improving much more after flexible refinement. All this will of course depend on how different are the bound and unbound conformations and the amount of data used to drive the docking process. In general, from our experience, the more and better data at hand, the larger the conformational changes that can be induced. -
-- This is clearly not the case. The scoring function is not perfect, but does a reasonable job in ranking models of acceptable or better quality on top in this case. -
-Top4 models of the top cluster superimposed onto the reference crystal structure (in yellow)
--QVQLQESGPGLVKPSQTLSLTCSFSGFSLSTSGMGVGWIRQPSGKGLEWLAHIWWDGDES -YNPSLKSRLTISKDTSKNQVSLKITSVTAADTAVYFCARNRYDPPWFVDWGQGTLVTVSS -- -* Antibody light chain: -
-DIQMTQSTSSLSASVGDRVTITCRASQDISNYLSWYQQKPGKAVKLLIYYTSKLHSGVPS -RFSGSGSGTDYTLTISSLQQEDFATYFCLQGKMLPWTFGQGTKLEIK -- -* Antigen: -
-VRSLNCTLRDSQQKSLVMSGPYELKALHLQGQDMEQQVVFSMSFVQGEESNDKIPVALGL -KEKNLYLSCVLKDDKPTLQLESVDPKNYPKKKMEKRFVFNKIEINNKLEFESAQFPNWYI -STSQAENMPVFLGGTKGGQDITDFTMQFVSS --
- Model1: pLDDT=90.4 pTM=0.654 ipTM=0.525 - Model2: pLDDT=88.0 pTM=0.65 ipTM=0.522 - Model3: pLDDT=88.2 pTM=0.647 ipTM=0.52 - Model4: pLDDT=88.0 pTM=0.644 ipTM=0.516 - Model5: pLDDT=88.1 pTM=0.641 ipTM=0.512 -- -
Analysis report of step 10_caprieval
-+(haddock3)$ haddock3 -h +usage: haddock3 [-h] [--restart RESTART] [--extend-run EXTEND_RUN] [--setup] + [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-v] + recipe + +positional arguments: + recipe The input recipe file path + +optional arguments: + -h, --help show this help message and exit + --restart RESTART Restart the run from a given step. Previous folders from the + selected step onward will be deleted. + --extend-run EXTEND_RUN + Start a run from a run directory previously prepared with the + `haddock3-copy` CLI. Provide the run directory created with + `haddock3-copy` CLI. + --setup Only setup the run, do not execute + --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL} + -v, --version show version ++
assign (selection1) (selection2) distance, lower-bound correction, upper-bound correction-The lower limit for the distance is calculated as: distance minus lower-bound -correction and the upper limit as: distance plus upper-bound correction. The -syntax for the selections can combine information about chainID - `segid` -keyword -, residue number - `resid` keyword -, atom name - `name` keyword. -Other keywords can be used in various combinations of OR and AND statements. -Please refer for that to the [online CNS manual](http://cns-online.org/v1.3/){:target="_blank"}. +The lower limit for the distance is calculated as: distance minus lower-bound correction +and the upper limit as: distance plus upper-bound correction. + +The syntax for the selections can combine information about: + +* chainID - `segid` keyword +* residue number - `resid` keyword +* atom name - `name` keyword. + +Other keywords can be used in various combinations of OR and AND statements. Please refer for that to the [online CNS manual][link-cns]{:target="_blank"}. -We will shortly explain in this section how to generate both ambiguous -interaction restraints (AIRs) and specific distance restraints for use in -HADDOCK illustrating two scenarios: +E.g.: a distance restraint between the CB carbons of residues 10 and 200 in chains A and B with an +allowed distance range between 10Å and 20Å would be defined as follows: -* **HV loops on the antibody, full surface on the antigen** -* **HV loops on the antibody, NMR interface mapping on the antigen** +
+assign (segid A and resid 10 and name CB) (segid B and resid 200 and name CB) 20.0 10.0 0.0 ++ + +Can you think of a different way of defining the distance and lower and upper corrections while maintaining the same +allowed range? + -Information about various types of distance restraints in HADDOCK can also be -found in our [online manual][air-help]{:target="_blank"} pages.
-31,32,33,34,35,52,54,55,56,100,101,102,103,104,105,106,1031,1032,1049,1050,1053,1091,1092,1093,1094,1096 +31,32,33,34,35,52,54,55,56,100,101,102,103,104,105,106,151,152,169,170,173,211,212,213,214,216The numbering corresponds to the numbering of the `4G6K_clean.pdb` PDB file. @@ -426,22 +427,18 @@ For this start PyMOL and load `4G6K_clean.pdb` File menu -> Open -> select 4G6K_clean.pdb -or from the command line: +Alternatively, if PyMOL is accessible from the command line, simply type: pymol 4G6K_clean.pdb -We will now highlight the predicted paratope. In PyMOL type the following commands: +We will now highlight the predicted paratope residues in red. In PyMOL type the following commands: -color white, all - - -select paratope, (resi 31+32+33+34+35+52+54+55+56+100+101+102+103+104+105+106+1031+1032+1049+1050+1053+1091+1092+1093+1094+1096)
-REM FreeSASA 2.0.3 -REM Absolute and relative SASAs for 4I1B_clean.pdb -REM Atomic radii and reference values for relative SASA: ProtOr -REM Chains: A -REM Algorithm: Lee & Richards -REM Probe-radius: 1.40 -REM Slices: 20 -REM RES _ NUM All-atoms Total-Side Main-Chain Non-polar All polar -REM ABS REL ABS REL ABS REL ABS REL ABS REL -RES VAL A 3 84.83 55.8 13.08 11.8 71.76 172.9 30.45 26.5 54.38 147.5 -RES ARG A 4 200.36 84.1 192.85 98.3 7.51 17.9 71.92 98.3 128.44 77.8 -RES SER A 5 48.69 41.1 25.55 34.1 23.14 53.3 22.44 47.8 26.25 36.8 -RES LEU A 6 71.91 40.0 70.87 50.7 1.04 2.6 71.91 50.5 0.00 0.0 -RES ASN A 7 31.01 21.4 25.87 25.0 5.14 12.4 0.00 0.0 31.01 30.0 -... -- -The following command will return all residues with a relative SASA for either -the backbone or the side-chain > 15% (we use 15% to limit the number of surface residues selected as their -number does increase the computational requirements) - - - awk \'{if (NF==13 && ($7>15 || $9>15)) printf \"\%d \",$3; if (NF==14 && ($8>15 || $10>15)) print $0}\' 4I1B_clean.rsa - - -The resulting list of residues can be found in the `restraints/antigen-surface.act-pass` file. Note in this file the empty first line. The file consists -of two lines, with the first one defining the `active` residues and the second line the `passive` ones, in this case the solvent accessible residues. -We will use later this file to generate the ambiguous distance restraints for HADDOCK. - -If you want to generate the same file, first create an empty line and then use the `awk` command, piping the results to an output file, e.g.: - - - echo \" \" \> antigen-surface.pass
-14/03/2023 13:15:20 L157 INFO - Calculate accessibility... -14/03/2023 13:15:20 L228 INFO - Chain: B - 151 residues -14/03/2023 13:15:20 L234 INFO - Applying cutoff to side_chain_rel - 0.15 -14/03/2023 13:15:20 L244 INFO - Chain B - 3,4,5,6,7,11,13,14,15,20,21,22,23,24,25,27,29,30,32,33,34,35,36,37,38,41,43,46,48,49,50,51,52,53,54,55,56,63,64,65,66,72,73,74,75,76,77,79,81,83,84,86,87,88,89,91,92,93,94,96,97,98,105,106,107,108,109,115,116,117,118,119,120,125,126,127,128,129,130,131,133,135,137,138,139,140,141,142,145,147,149,150,151,152,153 -- -We can visualize the selected surface residues of Interleukin-1β. - -For this start PyMOL and from the PyMOL File menu open the PDB file of the antigen. - - -color white, all - - -show surface - - -select surface15, (resi 3+4+5+6+13+14+15+20+21+22+23+24+25+30+32+33+34+35+37+38+48+49+50+51+52+53+54+55+61+63+64+65+66+73+74+75+76+77+80+84+86+87+88+89+90+91+93+94+96+97+105+106+107+108+109+118+119+126+127+128+129+130+135+136+137+138+139+140+141+142+147+148+150+151+152+153) - - -color green, surface40 - - - -color white, all - - -show surface - - -select surface40, (resi 3+4+5+6+13+14+15+20+21+22+23+24+25+30+32+33+34+35+37+38+48+49+50+51+52+53+54+55+61+63+64+65+66+73+74+75+76+77+80+84+86+87+88+89+90+91+93+94+96+97+105+106+107+108+109+118+119+126+127+128+129+130+135+136+137+138+139+140+141+142+147+148+150+151+152+153) - - -color green, surface40 - - -
-31 32 33 34 35 52 54 55 56 100 101 102 103 104 105 106 1031 1032 1049 1050 1053 1091 1092 1093 1094 1096 -+*__Active residues__*: These residues are "forced" to be at the interface. If they are not part of the interface in the final models, an energetic penalty will be applied. The interface in this context is defined by the union of active and passive residues on the partner molecules. -* and for the antigen (the file called `antigen-surface.pass` from the `restraints` directory): -
-3 4 5 6 13 14 15 20 21 22 23 24 25 30 32 33 34 35 37 38 48 49 50 51 52 53 54 55 61 63 64 65 66 73 74 75 76 77 80 84 86 87 88 89 90 91 93 94 96 97 105 106 107 108 109 118 119 126 127 128 129 130 135 136 137 138 139 140 141 142 147 148 150 151 152 153 -+*__Passive residues__*: These residues are expected to be at the interface. However, if they are not, no energetic penalty is applied. -Using those two files, we can generate the CNS-formatted AIR restraint files -with the following command: - -haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-surface.pass > ambig-paratope-surface.tbl - +In general, it is better to be too generous rather than too strict in the definition of passive residues. +An important aspect is to filter both the active (the residues identified from your mapping experiment) and passive residues by their solvent accessibility. +This is done automatically when using the `haddock3-restraints passive_from_active` command: residues with less that 15% relative solvent accessibility (same cutoff as the default in the HADDOCK server) are discared. +This is, however, not a hard limit, and you might consider including even more buried residues if some important chemical group seems solvent accessible from a visual inspection. -This generates a file called `ambig-paratope-surface.tbl` that contains the AIR -restraints. The default distance range for those is between 0 and 2Å, which -might seem short but makes senses because of the 1/r^6 summation in the AIR -energy function that makes the effective distance be significantly shorter than -the shortest distance entering the sum. -The effective distance is calculated as the SUM over all pairwise atom-atom -distance combinations between an active residue and all the active+passive on -the other molecule: SUM[1/r^6]^(-1/6). +
+1 32 33 34 35 52 54 55 56 100 101 102 103 104 105 106 151 152 169 170 173 211 212 213 214 216 --### Defining ambiguous restraints for scenario 2b +* For the antigen we will use the NMR-identified epitope as active and the surface neighbors as passive. +The corresponding file can be found in the `restraints` directory as `antigen-NMR-epitope.act-pass`: -In this scenario the NMR epitope is defined as active (meaning ambiguous distance restraints will be defined from the NMR epitope residues) and the surface neighbors are used as passive residues in HADDOCK. +
+
+72 73 74 75 81 83 84 89 90 92 94 96 97 98 115 116 117 +3 24 46 47 48 50 66 76 77 79 80 82 86 87 88 91 93 95 118 119 120 +-The creation of the AIR tbl file for scenario 2b is similar to scenario 1, but instead using the `antigen-NMR-epitope.act-pass` file for the antigen: +Using those two files, we can generate the CNS-formatted Ambiguous Interaction Restraints (AIRs) file with the following command: -haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.act-pass > ambig-paratope-NMR-epitope.tbl +haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.act-pass \-\-segid-one A \-\-segid-two B > ambig-paratope-NMR-epitope.tbl -
- assign (segid A and resi 220 and name CA) (segid A and resi 1018 and name CA) 47.578 0.0 0.0 - assign (segid A and resi 193 and name CA) (segid A and resi 1014 and name CA) 33.405 0.0 0.0 -- -This file is also provided in the `restraints` directory of the archive you downloaded. - -
+assign (resi 31 and segid A) +( + (resi 72 and segid B) + or + (resi 73 and segid B) + or + (resi 74 and segid B) + or + (resi 75 and segid B) + or + (resi 81 and segid B) + or + (resi 83 and segid B) + or + (resi 84 and segid B) + or + (resi 89 and segid B) + or + (resi 90 and segid B) + or + (resi 92 and segid B) + or + (resi 94 and segid B) + or + (resi 96 and segid B) + or + (resi 97 and segid B) + or + (resi 98 and segid B) + or + (resi 115 and segid B) + or + (resi 116 and segid B) + or + (resi 117 and segid B) + or + (resi 3 and segid B) + or + (resi 24 and segid B) + or + (resi 46 and segid B) + or + (resi 47 and segid B) + or + (resi 48 and segid B) + or + (resi 50 and segid B) + or + (resi 66 and segid B) + or + (resi 76 and segid B) + or + (resi 77 and segid B) + or + (resi 79 and segid B) + or + (resi 80 and segid B) + or + (resi 82 and segid B) + or + (resi 86 and segid B) + or + (resi 87 and segid B) + or + (resi 88 and segid B) + or + (resi 91 and segid B) + or + (resi 93 and segid B) + or + (resi 95 and segid B) + or + (resi 118 and segid B) + or + (resi 119 and segid B) + or + (resi 120 and segid B) +) 2.0 2.0 0.0 +... ++
+ assign (segid A and resi 110 and name CA) (segid A and resi 132 and name CA) 26.326 0.0 0.0 + assign (segid A and resi 97 and name CA) (segid A and resi 204 and name CA) 19.352 0.0 0.0 +-#### 2. batch mode +This file is also provided in the `restraints` directory. -In this mode HADDOCK3 will typically be started on your local server (e.g. the login node) and will dispatch jobs to the batch system of your cluster. -Two batch systems are currently supported: `slurm` and `torque` (defined by the `batch_type` parameter). In the configuration file you will -have to define the `queue` name and the maximum number of concurrent jobs sent to the queue (`queue_limit`). Since HADDOCK3 single model -calculations are quite fast, it is recommended to calculate multiple models within one job submitted to the batch system. -The number of model per job is defined by the `concat` parameter in the configuration file. -You want to avoid sending thousands of very short jobs to the batch system if you want to remain friend with your system administrators... -An example of the relevant parameters to be defined in the first section of the config file is: +
-mmodel md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq cluster-id cluster-ranking model-cluster-ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg total vdw vean xpcs -../6_emref/emref_11.pdb - 1 -151.136 1.261 0.741 2.673 2.192 0.746 2 1 1 79.990 0.000 0.000 2072.710 0.000 0.000 0.000 9.960 0.000 -598.859 0.000 0.000 0.000 -568.192 -49.323 0.000 0.000 -../6_emref/emref_15.pdb - 2 -137.252 1.237 0.845 2.713 2.253 0.783 2 1 2 83.274 0.000 0.000 2058.800 0.000 0.000 0.000 12.576 0.000 -584.402 0.000 0.000 0.000 -542.402 -41.275 0.000 0.000 -../6_emref/emref_19.pdb - 3 -136.527 1.550 0.621 4.283 3.353 0.634 2 1 3 200.318 0.000 0.000 1879.180 0.000 0.000 0.000 11.023 0.000 -704.878 0.000 0.000 0.000 -531.166 -26.606 0.000 0.000 -../6_emref/emref_14.pdb - 4 -131.142 1.658 0.776 3.271 3.005 0.699 2 1 4 163.724 0.000 0.000 2000.350 0.000 0.000 0.000 0.028 0.000 -449.205 0.000 0.000 0.000 -343.183 -57.702 0.000 0.000 -../6_emref/emref_1.pdb - 5 -128.501 14.936 0.069 22.861 21.984 0.067 3 2 1 159.850 0.000 0.000 1975.260 0.000 0.000 0.000 7.691 0.000 -451.593 0.000 0.000 0.000 -353.602 -61.859 0.000 0.000 -.... -+
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 2 10 - -139.014 7.386 1.426 0.182 0.746 0.081 3.235 0.650 0.715 0.056 131.826 51.848 2002.760 76.340 8.397 4.920 -584.336 90.832 -496.236 89.379 -43.727 11.464 1 -2 3 10 - -120.115 6.139 14.964 0.018 0.069 0.000 23.390 0.342 0.065 0.001 189.120 18.758 1998.883 56.075 4.601 5.111 -426.788 71.303 -295.939 64.795 -58.270 8.018 2 -3 1 19 - -86.814 2.027 8.747 0.451 0.112 0.019 16.725 0.548 0.115 0.010 203.898 11.457 1554.495 32.501 7.527 1.994 -355.098 23.298 -194.910 27.573 -43.710 4.911 3 -... -+source /vol0601/data/hp240465/Materials/Life_Science/20250312_Bonvin/haddock3/.venv/bin/activate +haddock3 ./workflows/docking-antibody-antigen.cfg -In this file you find the cluster rank, the cluster ID (which is related to the size of the cluster, 1 being always the largest cluster), the number of models (n) in the cluster and the corresponding statistics (averages + standard deviations). The corresponding cluster PDB files will be found in the processing `X_seletopclusts` directory. +{% endhighlight %} -
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 12 20 - -148.831 5.410 1.678 0.214 0.733 0.106 3.865 0.862 0.669 0.076 185.424 46.938 2111.405 29.031 7.232 1.046 -570.066 22.622 -445.234 30.130 -60.592 5.236 1 -2 18 18 - -123.766 3.115 13.094 0.125 0.000 0.000 21.834 0.146 0.048 0.001 183.704 11.225 2181.110 90.094 -10.254 2.067 -332.751 46.979 -214.379 31.846 -65.332 7.781 2 -3 88 10 - -123.360 3.041 9.176 0.141 0.017 0.000 21.413 0.684 0.060 0.003 121.738 16.249 1813.925 34.884 -5.384 0.512 -436.619 11.830 -357.708 16.713 -42.827 3.868 3 -4 13 20 - -118.282 5.208 10.944 0.299 0.120 0.021 21.415 0.619 0.092 0.006 152.420 10.986 1808.007 32.866 6.465 1.325 -496.468 28.628 -384.743 24.947 -40.696 2.465 4 -5 89 10 - -116.454 3.801 14.020 0.134 0.017 0.000 24.744 0.187 0.045 0.001 195.446 27.781 1889.682 79.773 -4.616 1.591 -417.563 20.307 -269.986 30.222 -47.870 1.270 5 -6 61 10 - -115.229 3.326 5.378 0.020 0.069 0.012 16.802 0.629 0.115 0.005 88.131 9.931 1852.945 23.752 6.616 0.785 -369.303 8.252 -337.969 11.465 -56.798 1.243 6 -7 3 27 - -114.186 5.971 13.496 0.080 0.000 0.000 20.665 0.102 0.052 0.001 165.852 1.993 1707.362 39.298 -3.782 2.991 -376.041 22.069 -261.970 27.164 -51.781 5.373 7 -8 72 10 - -112.872 6.827 13.981 0.020 0.000 0.000 21.801 0.126 0.048 0.000 132.974 16.434 1966.793 52.013 2.544 3.066 -294.085 43.518 -231.007 42.693 -69.896 9.485 8 -9 1 36 - -109.409 4.762 14.617 0.071 0.000 0.000 23.183 0.063 0.043 0.000 197.155 20.227 1744.597 41.866 2.528 2.065 -328.410 33.521 -197.225 28.906 -65.970 1.565 9 -10 17 18 - -108.195 7.362 11.031 0.082 0.000 0.000 20.012 0.260 0.057 0.001 164.234 10.186 1831.495 37.648 1.541 1.670 -396.469 47.982 -279.100 40.077 -46.865 5.102 10 -11 96 10 - -107.334 2.166 10.667 0.151 0.000 0.000 21.692 0.437 0.051 0.002 142.746 7.402 1610.415 71.508 -1.450 3.091 -388.339 21.616 -288.084 14.372 -42.491 6.894 11 -12 14 20 - -107.155 1.167 13.907 0.198 0.000 0.000 23.257 0.230 0.043 0.001 177.685 34.304 1817.573 49.504 -3.932 4.818 -359.962 72.737 -231.276 34.880 -48.999 7.524 12 -13 58 10 - -105.459 2.853 9.418 0.041 0.017 0.000 14.704 0.253 0.098 0.002 142.162 20.088 1801.787 41.822 7.304 1.091 -390.603 6.402 -297.299 14.909 -48.858 1.289 13 -14 7 22 - -104.950 8.949 12.591 0.206 0.000 0.000 21.517 0.421 0.050 0.002 137.010 17.810 1981.173 62.496 3.391 2.312 -294.790 53.286 -220.864 60.327 -63.084 12.713 14 -15 110 8 - -104.523 5.407 15.786 0.141 0.000 0.000 24.742 0.292 0.038 0.001 188.247 28.369 1656.015 29.305 -7.098 2.026 -309.368 15.169 -175.498 17.510 -54.376 4.423 15 -16 90 10 - -102.191 10.542 14.905 0.036 0.060 0.009 23.103 0.131 0.064 0.003 216.193 11.357 1942.820 85.100 5.473 2.317 -341.726 40.261 -186.471 38.069 -60.938 6.555 16 -17 104 9 - -102.002 3.200 14.681 0.078 0.095 0.009 23.567 0.190 0.074 0.003 282.735 23.431 1944.375 30.920 5.480 0.963 -406.682 30.081 -178.366 9.480 -54.419 6.790 17 -18 100 10 - -101.698 3.868 14.309 0.064 0.000 0.000 23.266 0.038 0.043 0.000 199.056 7.430 1988.773 23.017 -8.379 2.438 -240.464 33.134 -106.540 25.499 -65.132 4.179 18 -19 98 10 - -100.603 4.480 12.350 0.118 0.000 0.000 21.122 0.363 0.051 0.001 252.315 14.795 1677.152 43.166 -3.298 0.773 -394.031 29.076 -185.446 16.087 -43.730 1.316 19 -20 102 10 - -100.156 1.566 13.879 0.194 0.000 0.000 22.984 0.407 0.044 0.001 251.103 18.039 1770.800 64.193 -11.348 2.940 -282.111 7.081 -88.504 22.525 -57.496 2.031 20 -21 16 19 - -100.048 2.675 12.712 0.077 0.000 0.000 21.116 0.748 0.051 0.003 141.121 20.656 1614.205 84.277 -7.610 2.379 -245.708 28.106 -161.995 21.053 -57.408 1.913 21 -22 95 10 - -97.056 5.290 12.727 0.079 0.000 0.000 20.681 0.123 0.053 0.000 174.836 11.509 1808.197 27.762 7.899 2.831 -370.697 37.713 -244.160 41.611 -48.299 8.029 22 -23 40 10 - -96.143 7.294 3.996 0.063 0.151 0.023 7.960 0.413 0.269 0.007 268.064 6.044 1733.595 64.045 5.918 1.741 -440.012 18.438 -212.812 23.620 -40.865 2.978 23 -24 87 10 - -95.919 10.319 10.335 0.290 0.073 0.026 18.886 0.434 0.087 0.011 214.010 12.675 1786.825 65.807 -0.359 4.436 -345.955 25.147 -179.715 31.282 -47.770 4.496 24 -25 29 13 - -94.903 5.148 11.976 0.456 0.000 0.000 20.189 0.481 0.056 0.002 180.302 16.905 1590.420 71.694 3.421 0.674 -407.384 36.274 -261.960 42.426 -34.878 4.826 25 -26 82 10 - -94.568 1.367 13.254 0.087 0.021 0.007 23.375 0.213 0.050 0.002 139.468 19.278 1733.650 35.788 -7.849 1.524 -244.905 20.819 -157.122 27.990 -51.684 5.303 26 -27 74 10 - -93.734 3.985 11.956 0.081 0.099 0.007 22.498 0.236 0.080 0.002 301.590 4.206 1764.012 79.875 -3.076 3.370 -299.142 56.102 -58.541 44.764 -60.989 9.410 27 -28 19 17 - -92.956 3.507 13.908 0.268 0.026 0.015 23.921 0.385 0.050 0.004 266.721 14.416 1827.332 46.565 -0.139 2.193 -381.233 53.830 -157.755 45.492 -43.242 7.955 28 -29 122 6 - -92.745 8.304 12.279 0.037 0.000 0.000 19.752 0.060 0.057 0.000 181.589 15.741 1752.342 47.374 1.028 1.586 -279.507 47.440 -153.948 58.429 -56.030 3.553 29 -30 6 22 - -90.730 14.656 15.634 0.186 0.000 0.000 24.460 0.283 0.039 0.001 184.504 30.414 1646.735 13.700 -4.965 3.284 -279.679 35.172 -143.454 66.675 -48.279 4.415 30 -31 9 20 - -90.251 2.575 13.154 0.202 0.017 0.000 20.607 0.234 0.058 0.001 245.319 26.351 1504.862 38.200 10.189 2.033 -416.678 30.459 -212.996 51.332 -41.636 4.161 31 -32 22 16 - -90.038 7.776 13.627 0.190 0.021 0.014 24.214 0.496 0.048 0.006 184.562 6.921 1689.253 49.205 -3.136 2.159 -268.492 14.412 -135.590 22.903 -51.660 5.142 32 -33 26 14 - -89.241 3.176 12.877 0.056 0.073 0.014 20.812 0.219 0.077 0.005 219.898 7.800 1397.388 66.657 9.547 1.503 -442.622 29.172 -254.977 24.167 -32.253 2.642 33 -34 115 7 - -88.870 18.331 12.362 0.132 0.000 0.000 18.819 0.127 0.061 0.001 204.335 46.807 1699.523 139.984 12.768 1.118 -465.403 38.996 -290.059 88.987 -28.991 5.403 34 -35 31 13 - -88.274 9.696 11.822 0.027 0.000 0.000 20.745 0.055 0.053 0.000 212.259 7.756 1703.915 75.126 7.349 1.187 -401.980 29.718 -226.174 34.586 -36.453 5.963 35 -36 51 10 - -88.060 5.613 14.394 0.033 0.013 0.007 23.261 0.216 0.047 0.003 186.626 10.473 1530.995 13.755 -0.823 1.396 -285.016 40.961 -147.287 46.363 -48.896 3.714 36 -37 4 27 - -87.381 3.159 14.366 0.075 0.000 0.000 23.345 0.143 0.043 0.000 262.042 8.675 1685.735 33.973 2.679 1.701 -375.458 19.104 -154.588 19.121 -41.172 3.024 37 -38 66 10 - -86.618 5.301 14.360 0.065 0.065 0.007 22.073 0.117 0.068 0.002 175.771 18.244 1654.713 31.971 9.575 0.751 -391.041 27.344 -250.832 30.637 -35.561 2.215 38 -39 105 9 - -86.351 7.447 12.441 0.119 0.000 0.000 21.061 0.886 0.052 0.003 203.393 38.498 1621.940 67.036 11.485 4.238 -397.087 49.285 -232.452 34.907 -38.758 6.513 39 -40 2 28 - -85.386 3.721 8.292 0.100 0.060 0.009 14.033 0.238 0.120 0.004 223.683 20.263 1444.425 18.443 16.980 1.268 -531.053 52.923 -325.894 45.805 -18.524 8.161 40 -41 99 10 - -85.318 3.371 14.167 0.142 0.000 0.000 23.073 0.125 0.044 0.000 210.252 30.361 1688.745 36.455 -3.802 1.262 -232.285 17.646 -78.117 29.587 -56.084 2.159 41 -42 103 10 - -84.777 7.347 9.713 0.045 0.000 0.000 15.319 0.158 0.086 0.001 211.814 17.902 1762.062 62.020 -2.237 1.474 -302.213 19.881 -133.678 10.735 -43.278 8.354 42 -43 11 20 - -84.755 3.002 10.202 0.038 0.000 0.000 16.040 0.210 0.080 0.001 242.024 11.938 1664.300 32.341 -1.048 2.058 -320.478 20.571 -122.268 20.568 -43.814 3.539 43 -44 27 14 - -84.385 4.142 14.452 0.099 0.000 0.000 22.747 0.072 0.044 0.001 192.791 15.745 1680.112 60.521 8.681 0.414 -322.106 22.136 -177.239 23.588 -47.924 4.456 44 -45 30 13 - -83.349 7.710 12.527 0.328 0.000 0.000 20.534 1.002 0.054 0.004 205.630 34.703 1630.215 82.132 0.730 2.859 -288.909 10.434 -130.138 41.408 -46.861 4.465 45 -46 15 19 - -82.368 3.129 13.418 0.325 0.043 0.009 23.280 0.386 0.058 0.004 242.751 6.286 1658.195 38.410 5.431 0.414 -341.193 32.419 -142.278 23.070 -43.836 6.151 46 -47 20 16 - -81.607 6.919 12.495 0.107 0.000 0.000 19.252 0.363 0.059 0.002 266.161 42.181 1672.205 55.410 5.343 4.413 -414.589 26.369 -179.076 49.862 -30.648 5.816 47 -48 76 10 - -81.449 9.534 13.188 0.037 0.000 0.000 21.421 0.074 0.050 0.001 218.734 15.090 1736.692 89.208 -0.001 1.199 -290.595 36.010 -117.063 50.613 -45.203 3.614 48 -49 8 20 - -81.182 4.279 10.455 0.122 0.000 0.000 16.979 0.225 0.073 0.002 202.172 4.326 1751.410 32.215 6.297 1.591 -296.855 38.493 -143.008 36.221 -48.325 6.151 49 -50 83 10 - -80.925 4.370 12.827 0.095 0.000 0.000 18.830 0.175 0.061 0.001 178.048 13.548 1484.358 38.444 5.855 0.509 -387.956 12.917 -236.901 11.192 -26.994 3.744 50 -51 80 10 - -80.920 1.233 8.605 0.110 0.000 0.000 14.204 0.254 0.098 0.003 263.125 9.132 1680.088 44.302 -2.533 1.044 -276.380 9.676 -62.678 11.913 -49.423 2.206 51 -52 65 10 - -80.630 3.310 14.620 0.066 0.000 0.000 22.569 0.112 0.045 0.000 288.688 2.621 1667.402 19.562 6.148 0.292 -359.050 7.320 -114.200 9.850 -43.837 2.804 52 -53 78 10 - -80.553 1.319 15.253 0.064 0.000 0.000 23.967 0.193 0.041 0.001 243.820 10.141 1722.265 32.414 -6.757 0.875 -233.628 32.754 -41.260 21.926 -51.452 6.462 53 -54 91 10 - -80.434 7.690 12.606 0.433 0.065 0.007 22.027 0.766 0.070 0.005 239.985 31.342 1715.950 113.990 -2.244 1.763 -276.599 35.568 -83.483 44.904 -46.869 5.835 54 -55 86 10 - -79.938 1.932 14.133 0.054 0.000 0.000 21.594 0.090 0.049 0.001 234.653 13.341 1560.172 37.779 -2.434 3.230 -216.869 17.452 -39.812 24.206 -57.595 0.798 55 -56 94 10 - -79.785 3.074 14.163 0.044 0.000 0.000 23.755 0.224 0.042 0.001 233.620 10.459 1617.775 27.712 -4.985 3.008 -246.368 23.556 -61.636 15.735 -48.888 3.624 56 -57 57 10 - -79.647 3.136 14.334 0.035 0.000 0.000 22.584 0.139 0.045 0.001 206.786 21.758 1752.880 43.951 4.484 1.474 -292.769 26.191 -132.238 35.347 -46.255 2.074 57 -58 109 8 - -79.637 4.090 14.374 0.029 0.000 0.000 23.226 0.171 0.043 0.001 179.447 18.968 1587.280 48.041 -6.736 4.312 -216.970 21.146 -84.975 34.722 -47.452 4.918 58 -59 125 4 - -79.181 3.093 13.707 0.110 0.013 0.007 25.049 0.264 0.043 0.002 208.704 5.337 1603.477 18.923 0.803 3.084 -320.220 17.996 -148.327 16.357 -36.811 2.829 59 -60 75 10 - -79.050 2.188 9.582 0.072 0.013 0.007 17.607 0.404 0.075 0.002 255.051 11.542 1487.710 19.121 -3.626 3.331 -272.006 25.775 -63.483 15.928 -46.528 1.228 60 -61 21 16 - -78.910 7.107 12.204 0.791 0.000 0.000 19.571 0.565 0.058 0.003 265.529 23.762 1628.423 100.079 -7.391 2.450 -259.371 19.026 -40.040 31.465 -46.198 2.505 61 -62 119 6 - -78.891 12.708 12.225 0.073 0.000 0.000 21.918 0.277 0.049 0.001 183.873 60.583 1612.245 43.225 5.173 2.252 -342.645 40.522 -192.694 94.271 -33.921 1.601 62 -63 32 12 - -78.111 4.787 11.640 0.490 0.030 0.007 19.381 0.419 0.070 0.005 224.618 24.572 1458.675 59.540 12.585 2.659 -404.922 39.771 -212.478 43.741 -32.174 7.745 63 -64 93 10 - -75.558 0.893 15.617 0.035 0.000 0.000 25.553 0.142 0.036 0.001 234.040 18.302 1612.077 87.261 -2.881 1.880 -222.506 13.334 -40.046 22.117 -51.580 3.451 64 -65 53 10 - -74.878 0.925 11.095 0.055 0.000 0.000 19.669 0.309 0.058 0.001 217.684 13.625 1737.645 54.714 4.811 2.265 -267.514 39.563 -97.784 28.401 -47.954 8.106 65 -66 50 10 - -74.229 3.215 9.398 0.118 0.000 0.000 15.157 0.226 0.088 0.002 201.585 13.616 1307.077 34.282 7.107 1.138 -346.429 18.226 -177.052 16.240 -32.209 2.692 66 -67 46 10 - -74.057 6.696 13.393 0.171 0.038 0.008 20.998 0.234 0.064 0.004 182.613 28.046 1532.910 44.960 11.778 4.762 -283.146 29.545 -148.000 30.970 -47.467 2.189 67 -68 10 20 - -73.940 3.036 10.540 0.046 0.017 0.000 19.176 0.153 0.067 0.001 356.795 15.142 1492.230 74.353 15.509 2.170 -510.698 9.877 -176.892 19.108 -22.989 0.857 68 -69 44 10 - -73.811 2.120 13.513 0.025 0.000 0.000 20.546 0.111 0.052 0.001 210.162 18.014 1370.822 23.324 5.688 1.348 -373.221 16.893 -188.930 22.084 -25.871 3.664 69 -70 101 10 - -73.605 5.664 7.585 0.291 0.194 0.007 15.971 0.612 0.151 0.003 239.400 28.980 1437.820 17.631 2.496 1.252 -306.286 30.593 -105.669 8.236 -38.784 7.589 70 -71 73 10 - -73.330 14.683 8.421 0.080 0.142 0.022 16.234 0.195 0.129 0.007 291.068 13.141 1524.905 74.286 7.236 1.570 -318.544 28.529 -73.439 41.719 -45.964 8.000 71 -72 79 10 - -72.710 2.560 11.891 0.070 0.017 0.000 21.817 0.101 0.055 0.000 267.791 10.125 1643.050 30.536 -1.153 0.712 -316.054 21.622 -83.389 26.219 -35.125 3.184 72 -73 108 8 - -72.053 6.427 14.141 0.080 0.000 0.000 22.606 0.088 0.045 0.000 274.130 28.215 1575.818 26.120 2.867 4.129 -353.149 37.289 -110.722 50.901 -31.703 6.123 73 -74 116 7 - -71.931 3.431 14.263 0.020 0.021 0.007 21.881 0.092 0.055 0.002 239.220 23.289 1488.585 51.307 11.289 2.327 -337.511 20.455 -137.931 34.222 -39.639 2.696 74 -75 23 15 - -71.317 1.735 9.651 0.111 0.000 0.000 15.522 0.401 0.085 0.003 194.466 13.162 1484.050 42.048 9.548 1.288 -290.001 20.663 -137.847 15.853 -42.312 1.866 75 -76 24 15 - -71.030 2.953 16.556 0.115 0.000 0.000 28.355 0.305 0.030 0.000 205.405 8.091 1334.508 11.170 -5.633 2.051 -183.413 17.299 -27.264 20.045 -49.255 2.861 76 -77 28 13 - -70.176 3.460 14.159 0.213 0.004 0.007 22.016 0.254 0.048 0.003 212.614 35.492 1491.645 40.439 8.647 0.432 -294.032 36.700 -122.695 8.604 -41.277 2.477 77 -78 37 11 - -69.964 1.337 15.376 0.133 0.000 0.000 25.078 0.326 0.037 0.001 271.813 14.529 1713.650 51.673 -7.231 2.128 -241.582 17.736 -11.367 27.829 -41.599 3.838 78 -79 123 5 - -69.304 4.503 16.450 0.148 0.000 0.000 27.833 0.537 0.031 0.001 210.556 19.322 1326.200 45.711 -5.965 1.162 -161.157 6.948 -2.764 25.378 -52.163 3.012 79 -80 106 9 - -67.503 6.037 12.789 0.289 0.026 0.015 23.061 1.336 0.053 0.002 296.781 34.983 1620.858 52.591 5.560 2.969 -348.713 38.348 -84.930 63.453 -32.998 4.669 80 -81 120 6 - -67.302 2.532 14.358 0.041 0.000 0.000 22.837 0.023 0.044 0.000 271.706 12.518 1634.723 24.273 0.998 1.097 -227.589 9.829 -5.836 14.119 -49.953 3.620 81 -82 55 10 - -67.097 3.840 6.036 0.861 0.090 0.014 12.039 0.749 0.162 0.019 384.023 42.496 1531.295 87.955 3.794 4.479 -454.914 59.765 -89.202 76.199 -18.311 9.697 82 -83 124 4 - -66.655 5.062 6.249 0.264 0.224 0.012 11.025 0.671 0.217 0.008 235.183 27.050 1650.928 104.661 12.126 3.442 -328.671 31.748 -130.053 21.804 -36.565 3.933 83 -84 34 11 - -66.442 5.053 14.336 0.479 0.056 0.023 24.830 1.011 0.057 0.010 253.232 30.608 1472.758 125.167 2.441 1.679 -290.510 38.258 -73.383 33.664 -36.105 2.888 84 -85 38 10 - -64.966 3.389 12.351 0.042 0.000 0.000 18.220 0.062 0.065 0.001 237.737 22.406 1463.210 47.096 18.877 2.622 -395.482 9.668 -186.266 30.185 -28.521 1.362 85 -86 62 10 - -64.964 5.998 7.746 0.112 0.004 0.007 15.178 0.486 0.093 0.004 271.487 9.093 1577.678 76.023 -0.556 4.196 -206.951 36.880 14.369 36.913 -50.167 1.787 86 -87 25 15 - -64.334 4.335 13.027 0.411 0.021 0.007 23.541 0.557 0.050 0.001 185.122 8.359 1583.560 19.045 -1.219 6.525 -176.576 111.800 -37.766 99.230 -46.312 12.209 87 -88 42 10 - -62.875 14.148 14.485 0.024 0.000 0.000 23.143 0.209 0.043 0.001 179.610 11.087 1721.772 71.776 12.319 3.484 -243.120 35.868 -108.042 47.241 -44.532 3.820 88 -89 84 10 - -62.085 1.306 14.514 0.132 0.000 0.000 22.149 0.316 0.046 0.001 296.997 14.696 1383.428 30.064 1.898 3.669 -292.144 12.479 -30.401 25.823 -35.254 1.893 89 -90 121 6 - -61.957 4.116 12.331 0.311 0.000 0.000 19.451 0.605 0.058 0.003 270.992 23.909 1274.112 82.006 4.029 2.380 -266.295 41.053 -35.130 24.369 -39.827 4.542 90 -91 36 11 - -61.811 2.141 14.445 0.058 0.000 0.000 22.627 0.070 0.045 0.000 270.514 14.399 1609.382 28.876 1.751 1.481 -254.673 33.427 -23.837 34.147 -39.679 5.666 91 -92 77 10 - -61.079 5.725 5.780 0.082 0.039 0.015 10.799 0.259 0.162 0.009 240.534 37.298 1483.005 50.398 -0.002 2.077 -182.518 25.073 9.390 46.237 -48.626 2.883 92 -93 117 6 - -60.777 11.533 9.801 0.191 0.013 0.007 21.006 0.640 0.059 0.004 246.636 8.427 1268.660 71.298 6.936 2.487 -314.050 22.326 -96.981 32.734 -29.567 6.294 93 -94 85 10 - -60.209 2.297 11.213 0.218 0.000 0.000 18.816 0.542 0.062 0.003 381.891 10.325 1459.902 67.889 -2.759 1.863 -266.267 22.682 73.237 24.125 -42.386 2.780 94 -95 126 4 - -59.397 3.161 13.514 0.088 0.000 0.000 20.613 0.206 0.052 0.001 340.899 11.341 1457.237 38.500 0.742 1.761 -285.606 3.678 18.186 8.403 -37.107 1.557 95 -96 63 10 - -59.291 7.296 9.014 0.111 0.000 0.000 16.822 0.663 0.077 0.005 280.676 22.422 1318.048 29.172 9.234 1.737 -325.262 28.578 -76.127 49.934 -31.541 1.854 96 -97 70 10 - -58.877 4.462 13.014 0.056 0.000 0.000 22.464 0.254 0.046 0.001 228.543 28.692 1422.508 65.438 -4.012 3.320 -200.927 16.946 -9.918 31.989 -37.534 1.424 97 -98 45 10 - -58.029 6.480 7.814 0.140 0.017 0.000 13.251 0.317 0.115 0.004 229.389 29.836 1404.900 46.790 11.103 1.955 -327.900 39.785 -125.001 51.497 -26.490 2.094 98 -99 68 10 - -55.807 3.727 16.510 0.066 0.000 0.000 25.624 0.263 0.036 0.001 318.869 16.264 1213.652 9.474 -5.589 2.224 -229.506 11.490 53.160 7.966 -36.203 3.035 99 -100 67 10 - -55.715 3.802 14.121 0.078 0.073 0.007 23.021 0.127 0.068 0.003 364.598 7.533 1424.082 38.150 0.216 3.221 -301.127 9.515 31.305 11.925 -32.166 2.426 100 -101 43 10 - -55.357 4.538 13.771 0.174 0.000 0.000 22.382 0.222 0.046 0.001 315.525 17.911 1224.170 95.586 15.578 3.930 -449.307 38.562 -146.408 51.664 -12.626 5.604 101 -102 60 10 - -55.224 0.578 14.483 0.242 0.000 0.000 26.049 0.668 0.036 0.002 237.155 10.847 1474.113 27.846 0.259 1.804 -152.957 35.644 35.591 28.921 -48.607 4.912 102 -103 59 10 - -55.141 2.610 14.717 0.093 0.034 0.000 23.704 0.289 0.053 0.001 298.384 26.262 1406.250 66.647 9.341 3.393 -316.841 23.707 -49.410 30.705 -30.952 4.042 103 -104 112 7 - -54.921 9.461 10.685 0.208 0.073 0.026 18.506 0.476 0.089 0.007 219.840 44.971 1521.922 81.424 5.709 7.046 -214.847 15.424 -34.651 49.783 -39.644 4.737 104 -105 5 22 - -54.314 4.331 12.385 0.086 0.021 0.014 22.595 0.342 0.053 0.006 233.002 17.524 1358.430 35.130 1.471 0.185 -207.591 22.952 -12.155 32.463 -37.567 1.570 105 -106 52 10 - -53.491 15.207 12.353 0.103 0.000 0.000 22.051 0.367 0.048 0.001 233.007 31.498 1414.820 102.212 3.750 1.618 -249.119 20.721 -46.830 57.818 -30.718 7.272 106 -107 111 7 - -53.239 5.226 13.791 0.212 0.000 0.000 23.428 0.626 0.043 0.002 268.166 12.080 1414.135 56.300 7.739 1.257 -222.832 3.164 2.106 14.206 -43.228 4.768 107 -108 54 10 - -52.971 3.488 14.200 0.033 0.000 0.000 23.847 0.087 0.041 0.000 334.087 9.226 1546.130 36.750 -6.005 1.905 -190.261 19.194 101.503 17.352 -42.322 4.900 108 -109 56 10 - -51.490 5.020 13.521 0.052 0.000 0.000 22.416 0.243 0.046 0.001 293.306 19.777 1477.102 25.522 5.609 1.461 -279.596 12.796 -16.800 25.540 -30.510 5.347 109 -110 118 6 - -51.117 4.561 12.365 0.051 0.000 0.000 23.714 0.346 0.043 0.001 278.889 18.649 1432.265 9.470 0.370 2.751 -214.671 17.337 27.776 25.570 -36.442 3.072 110 -111 107 9 - -49.501 5.059 14.835 0.038 0.000 0.000 23.410 0.057 0.042 0.000 344.941 19.284 1343.577 12.807 -3.153 1.679 -264.398 20.388 52.580 29.967 -27.963 9.609 111 -112 35 11 - -49.340 2.447 11.317 0.928 0.039 0.015 20.295 1.572 0.069 0.011 235.385 37.942 1409.605 87.748 4.069 3.360 -185.705 44.350 9.873 43.861 -39.807 8.325 112 -113 33 12 - -46.737 4.308 11.174 0.816 0.000 0.000 19.641 1.416 0.059 0.007 310.128 20.042 1238.068 110.488 4.568 2.574 -256.976 22.111 22.230 37.127 -30.923 2.384 113 -114 113 7 - -45.800 5.982 10.348 0.124 0.000 0.000 18.316 0.481 0.066 0.003 282.571 18.197 1311.367 56.380 4.791 1.847 -211.064 18.902 34.872 19.595 -36.635 3.931 114 -115 81 10 - -45.354 6.929 16.636 0.096 0.000 0.000 25.863 0.726 0.035 0.002 308.236 11.719 1275.400 52.716 -5.692 4.714 -181.522 49.756 92.533 51.322 -34.181 9.503 115 -116 49 10 - -44.705 1.959 10.852 0.035 0.000 0.000 16.845 0.062 0.074 0.000 358.788 5.112 1341.040 72.642 10.412 2.121 -347.849 36.059 -10.488 33.632 -21.427 6.467 116 -117 39 10 - -44.261 4.620 14.087 0.056 0.000 0.000 22.835 0.013 0.044 0.000 295.495 17.162 1234.227 27.363 4.174 3.430 -210.269 16.733 49.294 18.637 -35.931 4.941 117 -118 92 10 - -44.053 1.106 15.813 0.128 0.000 0.000 25.745 0.368 0.036 0.001 283.916 13.486 1275.445 30.036 -6.639 1.857 -143.568 20.594 103.256 10.722 -37.092 1.745 118 -119 114 7 - -43.996 7.676 13.036 0.151 0.000 0.000 21.041 0.248 0.051 0.001 318.293 22.689 1397.472 69.100 6.502 2.294 -242.837 58.413 41.697 35.569 -33.760 4.092 119 -120 127 4 - -42.868 12.804 10.782 0.266 0.021 0.007 18.935 0.277 0.070 0.003 378.484 32.539 1279.013 79.482 -2.333 3.174 -249.324 69.688 100.641 96.319 -28.519 2.196 120 -121 97 10 - -41.988 1.493 15.101 0.025 0.000 0.000 23.745 0.034 0.041 0.000 301.125 3.112 1269.653 33.464 -8.743 1.470 -129.712 11.741 133.997 11.234 -37.416 1.471 121 -122 71 10 - -41.763 0.892 8.507 0.097 0.039 0.015 14.685 0.147 0.107 0.006 239.057 9.121 1327.050 24.875 8.577 4.145 -218.493 30.755 -9.983 30.699 -30.547 4.870 122 -123 47 10 - -36.220 3.928 15.277 0.106 0.000 0.000 23.010 0.110 0.043 0.000 284.950 8.282 1175.388 69.795 5.948 1.847 -213.173 34.270 43.748 38.868 -28.029 3.546 123 -124 41 10 - -32.454 10.820 12.104 0.204 0.000 0.000 20.116 0.705 0.056 0.003 396.786 30.865 1066.531 89.515 12.815 3.725 -377.880 29.248 9.535 54.759 -9.372 2.922 124 -125 64 10 - -27.162 1.992 10.301 0.345 0.000 0.000 18.657 0.861 0.065 0.005 402.122 49.749 1030.695 25.469 4.338 0.861 -257.608 45.825 124.323 22.168 -20.191 4.685 125 -126 48 10 - -14.746 4.336 10.121 0.049 0.000 0.000 18.586 0.361 0.065 0.002 357.317 19.257 1018.395 44.464 1.525 1.784 -147.322 10.412 187.457 30.279 -22.539 2.476 126 -127 69 10 - -13.047 2.110 13.381 0.036 0.000 0.000 23.537 0.088 0.043 0.000 326.021 21.144 1056.715 43.823 -2.112 1.084 -98.899 7.148 203.364 15.655 -23.757 1.618 127 -
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 48 10 - -13.844 0.900 13.404 0.035 0.000 0.000 22.237 0.123 0.046 0.001 1166.617 117.075 1315.115 40.932 -9.803 2.749 -3.493 0.991 1256.845 137.335 93.721 19.507 1 -9 138 10 - -10.454 0.674 13.607 0.000 0.000 0.000 22.064 0.000 0.047 0.000 1204.100 0.000 1665.775 24.438 -3.712 0.820 -2.350 0.000 1224.210 0.000 22.460 0.002 2 -3 11 10 - -10.412 0.432 15.857 0.000 0.000 0.000 24.564 0.000 0.039 0.000 1384.043 0.004 1245.633 13.154 -6.563 0.429 -5.648 0.000 1419.910 0.000 41.513 0.001 3 -2 15 10 - -10.340 0.286 16.017 0.000 0.000 0.000 24.570 0.000 0.039 0.000 1333.983 0.004 1253.492 15.726 -1.585 0.327 -9.873 0.000 1355.403 0.004 31.294 0.002 4 -4 106 10 - -9.829 0.222 15.391 0.000 0.000 0.000 24.525 0.000 0.039 0.000 1552.235 0.005 1383.528 17.921 -7.734 0.159 -4.185 0.000 1588.340 0.000 40.293 0.003 5 -6 69 10 - -9.533 1.575 12.950 0.269 0.000 0.000 20.490 1.095 0.054 0.005 1276.435 67.292 1199.227 109.102 -8.069 0.667 -2.824 1.663 1332.377 61.750 58.763 7.369 6 -5 83 10 - -9.012 0.282 14.254 0.000 0.000 0.000 22.921 0.000 0.044 0.000 1276.120 0.010 1183.360 18.970 -3.152 0.213 -7.525 0.000 1342.325 0.005 73.727 0.006 7 -7 80 10 - -7.775 0.494 9.909 0.113 0.000 0.000 15.681 0.333 0.083 0.002 1542.185 35.608 1324.515 18.563 -4.813 0.320 -5.803 0.471 1602.840 32.869 66.456 2.271 8 -12 146 10 - -7.423 0.848 14.398 0.000 0.000 0.000 23.944 0.000 0.041 0.000 1442.685 0.005 1299.652 32.980 -7.305 1.130 -1.911 0.000 1476.980 0.000 36.202 0.001 9 -8 103 10 - -7.333 0.221 14.981 0.000 0.000 0.000 24.008 0.000 0.040 0.000 1543.658 0.004 1030.420 14.645 -10.900 0.124 -2.262 0.000 1610.960 0.000 69.566 0.001 10 -10 57 10 - -6.935 0.458 14.379 0.000 0.000 0.000 23.639 0.000 0.042 0.000 1222.643 0.004 1453.100 18.107 -6.100 0.353 0.439 0.000 1326.150 0.000 103.066 0.004 11 -19 143 10 - -6.913 0.973 15.057 0.055 0.000 0.000 23.498 0.016 0.042 0.000 1637.182 34.931 1089.412 27.157 -8.432 1.560 -4.361 0.102 1673.073 22.235 40.253 12.796 12 -11 46 10 - -6.612 0.137 7.774 0.000 0.138 0.000 16.369 0.000 0.129 0.000 1427.530 0.000 1264.045 12.113 -2.676 0.088 -6.756 0.000 1539.297 0.004 118.522 0.001 13 -13 74 10 - -6.451 0.515 12.795 0.110 0.000 0.000 21.337 0.231 0.050 0.001 1227.858 0.583 1320.807 16.754 -3.158 0.556 -2.694 0.171 1258.178 3.958 33.012 4.713 14 -14 40 10 - -6.057 0.570 14.552 0.000 0.000 0.000 23.108 0.000 0.043 0.000 1325.322 0.004 1317.838 17.495 -3.007 0.666 -3.439 0.000 1353.310 0.000 31.430 0.001 15 -17 54 10 - -5.998 0.121 12.456 0.000 0.000 0.000 19.696 0.000 0.057 0.000 1314.395 0.005 1310.680 23.796 -1.208 0.317 -5.261 0.000 1352.630 0.000 43.495 0.001 16 -15 32 10 - -5.964 0.425 14.596 0.115 0.000 0.000 23.358 0.122 0.043 0.000 1277.912 7.923 1285.055 82.743 -3.391 0.456 -2.900 0.806 1314.905 3.200 39.893 5.527 17 -16 12 10 - -5.777 0.104 13.551 0.000 0.017 0.000 23.658 0.000 0.048 0.000 1198.952 0.008 1149.155 9.000 -1.300 0.161 -5.170 0.000 1213.340 0.000 19.559 0.004 18 -18 124 10 - -5.691 0.215 14.454 0.000 0.017 0.000 23.355 0.000 0.048 0.000 1416.720 0.007 1374.565 18.449 -4.243 0.272 -2.310 0.000 1458.433 0.004 44.024 0.004 19 -21 56 10 - -5.412 0.747 15.959 0.000 0.000 0.000 25.689 0.001 0.036 0.000 1217.487 0.004 1439.070 19.619 -0.762 0.765 -2.668 0.000 1238.170 0.000 23.352 0.001 20 -29 104 10 - -5.375 0.124 13.872 0.000 0.000 0.000 20.781 0.000 0.052 0.000 1406.888 0.015 1020.282 14.806 -3.770 0.202 -6.285 0.000 1481.987 0.004 81.386 0.009 21 -20 49 10 - -5.250 0.321 15.360 0.000 0.000 0.000 23.819 0.000 0.041 0.000 1494.370 0.000 1010.559 24.070 -8.385 0.381 -2.102 0.000 1532.178 0.004 39.910 0.001 22 -23 14 10 - -4.989 1.157 10.910 0.000 0.000 0.000 21.035 0.000 0.053 0.000 1197.435 0.015 1289.438 22.539 -1.583 1.033 -2.975 0.000 1243.383 0.004 48.924 0.011 23 -24 44 10 - -4.755 0.228 11.912 0.000 0.034 0.000 22.612 0.000 0.058 0.000 1538.758 0.004 1152.660 13.464 -4.545 0.280 -4.549 0.000 1582.030 0.000 47.828 0.000 24 -25 29 10 - -4.750 0.679 10.824 0.420 0.009 0.009 18.404 1.004 0.068 0.006 1371.130 157.122 1214.263 97.267 -3.579 1.850 -3.541 1.647 1447.700 178.555 80.112 41.852 25 -22 23 10 - -4.670 0.217 13.640 0.000 0.000 0.000 20.717 0.000 0.052 0.000 1074.330 0.000 1374.390 4.007 0.869 0.209 -2.729 0.000 1090.750 0.000 19.152 0.001 26 -28 38 10 - -4.363 0.605 16.033 0.155 0.000 0.000 25.480 0.271 0.036 0.001 1238.952 29.106 1052.057 45.106 -7.738 1.761 1.048 2.035 1285.750 39.525 45.747 12.456 27 -27 1 10 - -4.137 0.568 10.316 0.088 0.000 0.000 16.498 0.391 0.076 0.003 1721.757 67.381 1135.003 53.658 -4.618 0.422 -5.766 0.216 1754.023 66.472 38.026 0.691 28 -26 27 10 - -4.112 0.152 12.983 0.000 0.000 0.000 20.773 0.000 0.052 0.000 1139.570 0.000 1462.320 16.198 1.793 0.279 -2.812 0.000 1150.210 0.000 13.452 0.001 29 -30 61 10 - -4.099 0.842 13.370 0.239 0.000 0.000 21.815 0.084 0.048 0.000 1308.007 135.944 1550.715 205.819 1.005 1.959 -2.987 1.590 1336.082 149.353 31.061 14.996 30 -37 112 10 - -3.676 0.381 14.796 0.000 0.000 0.000 22.998 0.000 0.043 0.000 1486.800 0.000 1227.990 15.606 -0.708 0.396 -6.135 0.000 1538.500 0.000 57.833 0.002 31 -31 34 10 - -3.597 0.147 14.799 0.061 0.000 0.000 23.015 0.136 0.043 0.001 1310.543 23.128 1183.075 91.732 -0.950 0.684 -4.455 0.136 1359.398 1.033 53.309 21.961 32 -32 7 10 - -3.508 0.513 16.858 0.000 0.000 0.000 28.409 0.000 0.030 0.000 1301.480 0.000 1196.672 8.578 -4.923 0.571 -0.218 0.000 1359.760 0.000 58.499 0.001 33 -33 20 10 - -2.773 0.374 14.904 0.000 0.000 0.000 23.214 0.000 0.043 0.000 1183.130 0.000 1460.190 23.915 1.641 0.539 -1.872 0.000 1204.100 0.000 22.843 0.001 34 -35 77 10 - -2.739 0.764 12.238 0.036 0.000 0.000 19.323 0.055 0.059 0.000 1282.035 90.578 1268.540 64.375 -0.585 2.814 -2.576 0.397 1308.220 104.985 28.759 14.014 35 -42 42 10 - -2.495 1.064 9.577 0.000 0.017 0.000 21.186 0.000 0.060 0.000 1085.117 0.004 1410.815 38.489 0.948 0.957 -0.636 0.000 1129.460 0.000 44.976 0.002 36 -40 62 10 - -2.017 0.480 14.651 0.000 0.000 0.000 22.953 0.000 0.044 0.000 1362.965 0.005 1379.763 20.658 2.498 0.488 -4.718 0.000 1395.325 0.005 37.078 0.003 37 -34 64 10 - -2.009 0.327 12.646 0.167 0.065 0.007 21.928 0.363 0.070 0.004 1227.983 17.299 1275.485 30.604 -2.275 0.709 0.190 0.874 1283.332 19.239 55.160 2.813 38 -39 105 10 - -1.975 0.713 11.651 0.000 0.086 0.000 20.866 0.000 0.082 0.000 1426.785 0.005 1445.892 18.713 -1.552 0.833 -0.564 0.000 1459.400 0.000 33.182 0.001 39 -36 100 10 - -1.974 0.614 14.641 0.375 0.017 0.000 25.137 0.604 0.044 0.001 1613.605 50.415 1285.878 28.846 -1.852 1.404 -4.480 2.223 1717.162 105.853 108.035 57.663 40 -56 88 10 - -1.877 1.153 14.595 0.000 0.017 0.000 22.930 0.000 0.050 0.000 1377.500 0.000 1256.900 19.112 1.216 1.144 -4.825 0.000 1425.230 0.000 52.558 0.001 41 -41 36 10 - -1.834 0.548 13.132 0.000 0.000 0.000 21.975 0.000 0.048 0.000 972.241 0.007 1488.515 12.437 5.426 0.465 -2.704 0.000 1030.163 0.004 60.625 0.003 42 -38 30 10 - -1.684 0.302 14.753 0.000 0.069 0.000 23.211 0.000 0.066 0.000 1193.767 0.018 1714.410 16.251 4.303 0.358 -0.954 0.000 1210.110 0.007 17.296 0.011 43 -43 85 10 - -1.494 0.422 2.535 0.000 0.345 0.000 6.023 0.000 0.423 0.000 1955.515 0.005 1609.628 8.936 11.827 0.474 -17.319 0.000 1992.133 0.004 53.936 0.002 44 -48 75 10 - -1.190 1.574 12.890 0.000 0.000 0.000 21.606 0.000 0.049 0.000 1678.058 0.004 1003.288 20.621 -3.349 1.741 -5.239 0.000 1737.880 0.000 65.063 0.003 45 -44 4 10 - -1.162 0.159 13.134 0.000 0.017 0.000 20.540 0.000 0.059 0.000 1402.763 0.004 1128.918 30.713 4.675 0.392 -8.789 0.000 1415.280 0.000 21.304 0.001 46 -45 17 10 - -1.158 0.437 14.586 0.000 0.017 0.000 22.061 0.000 0.052 0.000 1350.850 0.000 1109.115 12.543 4.974 0.472 -9.271 0.000 1413.800 0.000 72.221 0.002 47 -46 127 10 - -1.143 0.190 14.426 0.000 0.000 0.000 22.311 0.000 0.046 0.000 1326.025 0.005 1255.963 21.347 -0.151 0.291 -2.092 0.000 1363.835 0.005 39.904 0.003 48 -58 149 10 - -0.788 0.547 14.657 0.000 0.000 0.000 23.481 0.001 0.042 0.000 1230.210 0.012 1123.400 25.376 0.072 0.545 -2.759 0.000 1310.590 0.007 83.137 0.006 49 -47 47 10 - -0.634 0.202 10.615 0.000 0.000 0.000 17.214 0.000 0.072 0.000 1582.995 0.005 1101.400 8.505 -3.618 0.282 -2.392 0.000 1636.595 0.005 55.990 0.002 50 -50 65 10 - -0.544 0.694 14.396 0.000 0.000 0.000 21.711 0.000 0.048 0.000 1204.223 0.004 1317.295 29.407 1.424 0.855 -1.151 0.000 1234.443 0.004 31.369 0.004 51 -49 123 10 - -0.413 0.261 14.733 0.000 0.000 0.000 22.393 0.000 0.045 0.000 1690.878 0.013 1054.225 25.599 -2.721 0.379 -4.661 0.000 1746.453 0.008 60.234 0.010 52 -53 41 10 - -0.301 0.901 11.978 0.000 0.000 0.000 20.686 0.000 0.053 0.000 1281.405 0.005 1249.838 29.204 4.705 0.853 -6.289 0.001 1371.910 0.000 96.793 0.001 53 -52 72 10 - 0.004 0.778 11.393 0.000 0.000 0.000 18.815 0.000 0.062 0.000 1841.592 0.004 1184.938 24.043 -2.198 0.990 -4.820 0.000 1882.420 0.000 45.649 0.002 54 -55 60 10 - 0.009 0.555 12.540 0.000 0.000 0.000 22.058 0.000 0.048 0.000 1472.052 0.008 1173.470 32.950 1.109 0.517 -4.536 0.001 1512.588 0.004 45.071 0.002 55 -51 33 10 - 0.107 0.243 13.624 0.000 0.000 0.000 22.164 0.000 0.047 0.000 1015.085 0.011 1247.485 25.589 6.940 0.203 -4.666 0.000 1026.035 0.005 15.620 0.008 56 -64 107 10 - 0.205 0.370 10.381 0.000 0.017 0.000 18.922 0.000 0.069 0.000 1627.175 0.009 1123.535 23.337 -0.720 0.382 -4.857 0.000 1696.867 0.004 74.549 0.006 57 -54 66 10 - 0.360 0.318 1.684 0.402 0.526 0.181 3.075 0.744 0.622 0.116 1597.793 284.803 1513.723 249.385 8.918 5.003 -10.031 0.661 1650.948 320.308 63.184 36.167 58 -57 111 10 - 0.766 0.550 14.904 0.000 0.000 0.000 22.475 0.000 0.045 0.000 1626.950 0.007 1014.321 27.012 -1.440 0.342 -4.577 0.000 1688.060 0.000 65.685 0.004 59 -59 55 10 - 0.858 0.540 13.865 0.000 0.017 0.000 23.838 0.000 0.047 0.000 1807.443 0.004 1378.845 3.558 8.995 0.538 -12.902 0.000 1842.455 0.005 47.919 0.003 60 -62 147 10 - 0.956 0.701 15.581 0.054 0.000 0.000 24.005 0.203 0.041 0.001 1597.950 3.608 1254.088 53.590 -0.842 0.535 -1.931 0.759 1624.987 4.291 28.966 0.072 61 -60 71 10 - 1.057 0.728 13.646 0.029 0.034 0.000 23.918 0.334 0.052 0.001 1631.675 116.506 1124.720 88.785 -3.940 2.101 -0.651 0.045 1688.805 101.923 57.782 14.540 62 -109 113 10 - 1.060 0.681 12.545 0.000 0.000 0.000 19.286 0.000 0.059 0.000 1484.197 0.004 920.633 17.258 -2.448 0.516 -2.606 0.000 1529.520 0.000 47.929 0.002 63 -80 91 10 - 1.140 0.825 5.592 0.000 0.069 0.000 17.456 0.000 0.109 0.000 1030.130 0.012 1601.830 27.680 5.160 0.616 0.752 0.000 1125.423 0.004 94.540 0.008 64 -61 22 10 - 1.547 0.489 12.884 0.000 0.000 0.000 18.658 0.000 0.062 0.000 1287.670 0.000 1227.920 26.361 2.087 0.630 -1.563 0.000 1328.707 0.004 42.597 0.002 65 -65 142 10 - 1.609 0.465 13.598 0.001 0.000 0.000 20.523 0.000 0.053 0.000 1812.180 0.007 1242.125 9.410 4.169 0.487 -8.600 0.001 1837.472 0.004 33.893 0.006 66 -63 2 10 - 1.618 0.620 11.163 0.000 0.052 0.000 19.921 0.000 0.074 0.000 1612.010 0.007 1142.242 17.089 0.629 0.472 -4.038 0.000 1640.850 0.000 32.880 0.004 67 -71 79 10 - 1.719 0.983 12.761 0.000 0.000 0.000 18.973 0.000 0.060 0.000 1428.622 0.004 1202.068 29.316 6.850 1.082 -7.824 0.000 1463.520 0.000 42.716 0.002 68 -69 53 10 - 1.771 1.118 14.068 0.000 0.000 0.000 22.330 0.000 0.046 0.000 1316.773 0.004 1285.473 14.217 6.192 1.087 -4.974 0.001 1335.790 0.000 23.991 0.001 69 -67 21 10 - 1.840 0.428 10.784 0.000 0.034 0.000 19.322 0.000 0.072 0.000 1285.688 0.008 1117.495 10.149 -0.691 0.439 0.523 0.001 1318.855 0.005 32.646 0.006 70 -70 134 10 - 1.845 0.422 11.423 0.000 0.000 0.000 19.694 0.000 0.058 0.000 1223.665 0.005 1143.160 12.317 3.324 0.532 -2.622 0.000 1254.850 0.000 33.809 0.003 71 -66 28 10 - 2.156 0.092 16.901 0.145 0.000 0.000 25.946 0.295 0.035 0.000 1664.168 128.023 953.917 39.846 -2.267 2.191 -3.344 0.114 1727.315 172.192 66.488 44.052 72 -72 51 10 - 2.177 0.345 14.563 0.000 0.052 0.000 24.598 0.000 0.056 0.000 1316.715 0.009 1570.730 16.814 6.253 0.181 -2.070 0.000 1368.102 0.004 53.459 0.004 73 -68 5 10 - 2.186 0.209 10.791 0.000 0.000 0.000 17.361 0.000 0.071 0.000 1320.465 0.009 1352.238 23.403 4.872 0.278 -2.775 0.000 1358.330 0.000 40.637 0.005 74 -73 52 10 - 2.462 0.437 11.857 0.000 0.034 0.000 21.481 0.000 0.062 0.000 1332.390 0.007 1331.270 18.518 3.857 0.469 -2.157 0.000 1405.307 0.004 75.072 0.005 75 -75 117 10 - 2.591 1.501 5.582 0.000 0.017 0.000 10.747 0.000 0.156 0.000 1471.223 0.004 1131.087 27.013 -0.164 1.301 -1.468 0.000 1551.900 0.000 82.145 0.003 76 -76 121 10 - 2.757 0.190 15.413 0.030 0.000 0.000 23.407 0.072 0.042 0.000 1441.680 23.042 1181.880 26.999 2.544 0.494 -2.763 0.841 1476.762 16.185 37.844 6.016 77 -74 8 10 - 2.804 0.346 9.774 0.000 0.000 0.000 15.587 0.000 0.084 0.000 1031.230 0.012 1151.565 3.228 4.639 0.312 -0.954 0.002 1062.565 0.005 32.287 0.005 78 -77 125 10 - 2.956 0.265 9.059 0.000 0.000 0.000 14.582 0.000 0.093 0.000 1332.722 0.004 1325.645 17.543 -0.222 0.167 2.522 0.000 1393.822 0.004 58.576 0.004 79 -79 148 10 - 3.338 0.337 9.501 0.000 0.017 0.000 16.813 0.000 0.082 0.000 1373.653 0.004 1326.715 10.338 2.499 0.388 0.027 0.000 1407.895 0.005 34.216 0.003 80 -78 43 10 - 3.420 0.161 13.876 0.062 0.000 0.000 24.966 0.181 0.038 0.000 1203.815 13.490 1353.628 21.704 3.673 0.639 0.968 0.680 1232.450 11.316 27.667 1.490 81 -85 90 10 - 3.499 0.582 11.694 0.000 0.000 0.000 19.420 0.000 0.059 0.000 1524.062 0.008 1254.383 26.891 2.957 0.549 -2.600 0.000 1566.020 0.000 44.559 0.004 82 -96 144 10 - 3.736 1.983 14.597 0.155 0.000 0.000 23.244 0.913 0.043 0.003 1638.918 27.535 1089.932 148.738 -1.437 2.850 -0.736 0.937 1680.150 26.411 41.970 7.550 83 -83 81 10 - 4.181 0.676 10.556 0.000 0.017 0.000 19.500 0.000 0.066 0.000 1679.600 0.000 1209.035 23.242 8.004 0.495 -8.994 0.001 1717.150 0.000 46.546 0.001 84 -86 126 10 - 4.231 0.820 13.402 0.000 0.000 0.000 22.864 0.000 0.045 0.000 1437.102 0.004 1009.162 9.760 -0.405 0.755 -0.675 0.000 1539.537 0.004 103.107 0.001 85 -82 35 10 - 4.429 0.243 13.516 0.086 0.000 0.000 21.373 0.051 0.050 0.001 1422.540 1.230 1373.582 45.688 4.605 0.121 -1.357 0.944 1490.320 18.770 69.143 18.482 86 -108 84 10 - 4.431 2.539 5.560 0.856 0.060 0.015 12.118 0.730 0.154 0.019 2052.713 82.831 1181.928 181.411 2.306 1.076 -7.277 0.965 2114.883 74.846 69.446 7.018 87 -81 10 10 - 4.533 0.147 13.434 0.000 0.000 0.000 20.796 0.000 0.052 0.000 1313.870 0.017 1106.747 9.549 4.643 0.101 -2.606 0.002 1353.747 0.004 42.483 0.009 88 -84 118 10 - 4.597 0.401 14.224 0.000 0.000 0.000 21.784 0.000 0.048 0.000 812.933 0.006 1561.932 19.206 6.094 0.589 5.630 0.001 854.980 0.002 36.417 0.005 89 -87 133 10 - 4.605 0.650 12.937 0.000 0.000 0.000 23.370 0.000 0.043 0.000 1314.115 0.005 955.893 27.114 0.978 0.632 -0.502 0.000 1368.153 0.004 54.539 0.003 90 -106 18 10 - 4.806 0.311 13.463 0.000 0.000 0.000 24.373 0.001 0.040 0.000 1207.367 0.004 1226.390 11.365 7.546 0.398 -2.879 0.000 1237.415 0.005 32.926 0.001 91 -88 9 10 - 4.811 1.015 17.516 0.000 0.000 0.000 26.890 0.000 0.033 0.000 1593.650 0.007 852.707 14.236 -3.510 1.086 0.360 0.000 1649.222 0.004 55.215 0.003 92 -89 76 10 - 4.921 0.828 8.479 0.001 0.121 0.000 16.155 0.009 0.123 0.000 1675.845 1.210 1128.490 24.540 5.340 0.543 -6.340 0.197 1714.182 0.917 44.677 0.098 93 -92 135 10 - 5.104 0.801 12.373 0.000 0.000 0.000 21.291 0.001 0.051 0.000 1682.297 0.008 879.111 26.874 0.911 0.548 -4.271 0.002 1721.285 0.005 43.257 0.003 94 -91 101 10 - 5.127 0.529 14.134 0.000 0.069 0.000 22.851 0.000 0.067 0.000 1774.452 0.004 1143.035 25.663 5.251 0.622 -6.708 0.000 1794.785 0.005 27.042 0.002 95 -93 68 10 - 5.267 0.116 13.506 0.000 0.000 0.000 23.168 0.000 0.044 0.000 1270.545 0.042 903.954 16.269 3.013 0.114 -3.875 0.000 1513.010 0.016 246.341 0.028 96 -90 98 10 - 5.488 0.197 12.076 0.000 0.052 0.000 22.598 0.000 0.064 0.000 1417.480 0.000 1486.365 17.352 4.645 0.302 1.068 0.000 1464.918 0.004 46.366 0.002 97 -95 78 10 - 5.560 0.457 16.706 0.008 0.000 0.000 25.451 0.140 0.036 0.000 1842.725 31.353 944.442 50.103 0.728 0.730 -4.713 0.093 1894.173 29.224 56.160 2.221 98 -94 24 10 - 5.636 0.329 8.495 0.000 0.034 0.000 14.545 0.000 0.106 0.000 1438.640 0.007 1052.645 15.771 1.940 0.237 -0.598 0.000 1481.457 0.004 43.412 0.003 99 -98 82 10 - 5.912 1.108 9.062 0.000 0.000 0.000 15.621 0.000 0.085 0.000 1579.757 0.022 1004.099 15.653 5.426 1.168 -6.184 0.000 1665.025 0.011 91.451 0.016 100 -100 102 10 - 6.178 0.304 7.429 0.009 0.000 0.000 13.326 0.125 0.110 0.002 1579.557 20.718 876.049 15.882 3.925 2.192 -5.419 1.923 1637.915 20.785 63.778 1.855 101 -101 73 10 - 6.491 1.286 5.274 1.121 0.112 0.009 9.331 1.965 0.220 0.043 1572.305 148.870 1132.720 36.265 8.968 1.519 -7.591 0.267 1636.495 138.775 71.783 10.363 102 -97 95 10 - 6.500 0.357 14.891 0.012 0.000 0.000 22.738 0.016 0.044 0.000 1324.383 41.675 1347.323 23.736 7.302 0.979 -1.935 0.308 1458.680 26.090 136.237 15.891 103 -103 16 10 - 6.598 0.717 13.543 0.000 0.000 0.000 25.565 0.000 0.037 0.000 1435.965 0.023 965.739 17.475 4.647 0.755 -3.499 0.000 1507.220 0.007 74.752 0.014 104 -113 131 10 - 6.748 0.706 11.362 0.097 0.000 0.000 21.172 0.035 0.052 0.000 1272.642 14.128 1152.102 20.187 2.181 0.890 2.808 0.199 1330.730 20.670 55.278 6.741 105 -99 26 10 - 6.777 0.221 14.549 0.000 0.034 0.000 22.246 0.000 0.057 0.000 1337.785 0.005 1237.665 9.368 12.623 0.281 -7.317 0.000 1377.372 0.004 46.903 0.001 106 -111 140 10 - 6.779 0.467 13.408 0.000 0.000 0.000 21.842 0.000 0.048 0.000 1701.625 0.009 1130.092 12.459 7.382 0.510 -6.848 0.000 1747.780 0.000 53.005 0.004 107 -102 70 10 - 6.859 0.368 14.104 0.000 0.034 0.000 23.831 0.000 0.053 0.000 1421.310 0.007 1245.168 28.046 7.072 0.205 -2.606 0.000 1481.862 0.004 63.157 0.003 108 -104 128 10 - 6.979 0.272 7.750 0.000 0.000 0.000 15.894 0.000 0.086 0.000 1565.898 0.004 1165.102 17.917 2.422 0.296 0.114 0.000 1609.468 0.004 43.454 0.002 109 -105 37 10 - 7.074 0.273 11.212 0.000 0.017 0.000 19.176 0.000 0.066 0.000 1435.567 0.004 1069.815 30.137 5.507 0.352 -2.716 0.000 1495.420 0.000 62.566 0.003 110 -107 63 10 - 7.255 0.593 12.974 0.000 0.017 0.000 23.438 0.000 0.049 0.000 1090.135 0.005 1330.148 10.453 2.694 0.583 6.481 0.000 1144.622 0.004 48.006 0.003 111 -110 141 10 - 7.577 0.087 14.993 0.000 0.000 0.000 26.718 0.000 0.034 0.000 1348.737 0.004 1125.025 15.059 3.369 0.154 1.314 0.001 1415.803 0.004 65.755 0.004 112 -112 39 10 - 7.717 1.437 13.076 0.000 0.034 0.000 21.292 0.000 0.062 0.000 1418.793 0.004 1031.657 36.860 7.200 1.546 -3.905 0.000 1469.900 0.000 55.008 0.004 113 -115 87 10 - 7.749 0.454 13.090 0.000 0.000 0.000 23.222 0.000 0.044 0.000 1476.158 0.011 1272.345 14.120 8.203 0.331 -3.187 0.001 1542.362 0.004 69.394 0.007 114 -114 19 10 - 8.156 0.557 9.610 0.000 0.000 0.000 14.939 0.000 0.089 0.000 1217.220 0.000 1305.247 14.882 12.752 0.458 -4.164 0.000 1257.840 0.000 44.788 0.001 115 -121 145 10 - 8.234 0.328 10.233 0.000 0.000 0.000 18.503 0.000 0.065 0.000 1947.057 0.004 867.776 31.685 3.537 0.526 -6.582 0.000 1988.970 0.000 48.495 0.001 116 -119 136 10 - 8.348 0.416 14.705 0.000 0.034 0.000 25.924 0.000 0.047 0.000 1386.100 0.007 1184.423 17.051 4.270 0.435 1.673 0.001 1426.608 0.004 38.834 0.003 117 -122 108 10 - 8.381 0.460 14.681 0.085 0.000 0.000 23.063 0.127 0.043 0.001 1232.655 19.625 1256.318 24.006 9.329 0.902 -1.541 0.488 1314.030 2.905 82.916 16.234 118 -118 59 10 - 8.410 0.499 14.607 0.000 0.000 0.000 22.480 0.000 0.045 0.000 1369.202 0.004 1270.800 13.880 9.935 0.543 -3.034 0.000 1418.590 0.000 52.421 0.003 119 -117 6 10 - 8.515 0.753 15.939 0.155 0.000 0.000 24.412 0.259 0.039 0.000 1449.795 8.149 1040.126 30.813 3.445 1.306 -0.055 0.320 1552.490 52.943 102.751 44.470 120 -116 25 10 - 8.519 0.328 14.667 0.000 0.034 0.000 24.106 0.000 0.052 0.000 1754.810 0.000 914.418 13.386 7.544 0.397 -8.388 0.000 1842.332 0.004 95.912 0.001 121 -123 67 10 - 8.816 0.536 10.832 0.000 0.000 0.000 18.596 0.000 0.064 0.000 1463.985 0.005 1034.602 12.469 6.496 0.480 -2.527 0.000 1516.725 0.005 55.269 0.003 122 -120 109 10 - 8.886 0.324 13.897 0.000 0.000 0.000 22.887 0.000 0.044 0.000 1739.505 0.005 1021.742 10.450 10.462 0.263 -9.293 0.000 1784.128 0.004 53.917 0.000 123 -126 116 10 - 9.037 0.868 13.219 0.286 0.000 0.000 25.431 1.049 0.038 0.003 1717.020 26.917 1053.345 24.494 5.122 1.014 -4.705 1.799 1910.625 57.426 198.310 32.310 124 -127 110 10 - 9.102 0.544 12.595 0.000 0.000 0.000 20.109 0.000 0.055 0.000 1458.440 0.000 1326.795 8.218 10.158 0.491 -2.662 0.000 1484.760 0.000 28.977 0.001 125 -128 94 10 - 9.273 0.182 11.377 0.000 0.000 0.000 20.295 0.000 0.055 0.000 1350.680 0.007 1263.432 15.692 4.961 0.186 3.037 0.000 1394.020 0.000 40.300 0.003 126 -124 13 10 - 9.465 0.169 13.992 0.000 0.000 0.000 23.157 0.000 0.043 0.000 1146.433 0.013 1120.465 15.556 10.227 0.270 -1.592 0.000 1201.948 0.004 57.108 0.007 127 -130 45 10 - 9.550 0.386 9.814 0.000 0.017 0.000 16.010 0.001 0.087 0.000 1554.797 0.004 1074.615 29.539 8.300 0.409 -4.112 0.000 1606.680 0.000 55.995 0.002 128 -135 152 10 - 9.610 0.259 15.446 0.000 0.000 0.000 23.011 0.000 0.043 0.000 1665.850 0.000 946.076 8.652 5.728 0.268 -4.098 0.000 1739.930 0.000 78.180 0.001 129 -125 93 10 - 9.689 0.119 13.345 0.366 0.000 0.000 23.264 0.111 0.043 0.000 1423.375 94.377 1040.887 98.513 5.578 0.171 -0.319 2.153 1483.550 109.385 60.492 17.162 130 -133 115 10 - 9.787 0.539 13.502 0.000 0.034 0.000 22.814 0.000 0.056 0.000 1637.325 0.005 1220.587 16.638 9.947 0.458 -4.921 0.001 1691.818 0.004 59.413 0.004 131 -131 114 10 - 9.826 0.114 13.465 0.000 0.000 0.000 20.399 0.000 0.053 0.000 1818.070 0.000 1053.033 18.751 9.771 0.258 -8.246 0.000 1874.900 0.000 65.072 0.001 132 -129 3 10 - 10.018 0.127 14.428 0.000 0.034 0.000 22.032 0.000 0.058 0.000 1549.895 0.005 1118.065 42.148 12.787 0.454 -7.537 0.001 1587.460 0.000 45.103 0.003 133 -132 119 10 - 10.143 0.318 14.591 0.000 0.000 0.000 23.723 0.000 0.041 0.000 2030.400 0.000 983.656 8.263 4.708 0.325 -5.477 0.000 2069.310 0.000 44.387 0.001 134 -136 150 10 - 10.431 0.253 14.509 0.338 0.073 0.007 23.445 0.046 0.067 0.003 2007.825 335.507 1296.773 80.914 10.058 1.357 -7.287 5.448 2055.438 339.336 54.902 9.277 135 -150 137 10 - 10.486 0.981 13.674 0.276 0.026 0.009 21.535 0.203 0.057 0.004 1802.435 67.949 850.777 19.785 5.939 1.057 -6.012 0.908 1900.545 36.937 104.120 32.144 136 -134 96 10 - 10.666 0.361 14.905 0.000 0.000 0.000 23.611 0.000 0.042 0.000 1180.222 0.008 1197.192 20.594 9.497 0.535 0.430 0.001 1271.465 0.005 90.810 0.004 137 -145 130 10 - 10.686 0.215 10.825 0.001 0.069 0.000 18.924 0.000 0.085 0.000 1267.947 0.004 1066.520 11.839 6.859 0.271 0.757 0.000 1374.340 0.000 105.638 0.005 138 -137 97 10 - 10.744 0.356 11.161 0.121 0.000 0.000 17.162 0.102 0.071 0.000 1500.715 119.809 1067.945 5.794 12.386 1.759 -6.461 0.315 1543.383 131.415 49.125 11.293 139 -138 89 10 - 10.796 0.255 8.739 0.000 0.017 0.000 14.675 0.000 0.099 0.000 1540.630 0.000 1060.565 11.528 12.264 0.289 -6.895 0.000 1596.385 0.005 62.650 0.001 140 -141 122 10 - 10.930 0.437 14.198 0.141 0.000 0.000 23.447 0.395 0.043 0.001 1390.735 77.045 1002.527 29.545 8.394 0.193 -1.835 0.884 1437.828 84.728 48.932 8.567 141 -139 50 10 - 10.969 0.619 10.602 0.000 0.000 0.000 18.906 0.000 0.063 0.000 1799.200 0.000 766.240 37.212 1.470 0.550 -1.686 0.000 1882.990 0.000 85.470 0.001 142 -147 120 10 - 11.521 0.159 4.285 0.000 0.121 0.000 8.527 0.000 0.243 0.000 1526.737 0.004 1528.830 15.598 15.387 0.301 -5.049 0.000 1642.110 0.000 120.418 0.003 143 -140 58 10 - 11.638 0.280 11.900 0.000 0.017 0.000 18.863 0.000 0.067 0.000 1899.500 0.000 1081.518 33.994 5.711 0.435 -2.672 0.000 1938.750 0.000 41.925 0.000 144 -142 31 10 - 12.238 0.461 13.303 0.000 0.000 0.000 21.021 0.000 0.051 0.000 1406.755 0.009 1096.230 19.810 11.836 0.584 -3.236 0.000 1456.838 0.004 53.317 0.006 145 -143 151 10 - 12.714 0.279 7.749 0.000 0.000 0.000 13.417 0.000 0.108 0.000 1964.388 0.004 896.694 19.473 6.427 0.348 -4.948 0.001 2015.227 0.004 55.788 0.002 146 -144 129 10 - 12.743 0.384 13.927 0.169 0.000 0.000 20.877 0.157 0.051 0.001 1557.322 37.718 937.363 18.414 7.410 1.237 -1.420 1.619 1611.295 33.980 55.392 2.120 147 -146 132 10 - 12.926 0.195 13.916 0.000 0.000 0.000 22.376 0.000 0.046 0.000 1836.933 0.004 878.615 11.559 11.218 0.259 -8.685 0.000 1909.210 0.000 80.959 0.002 148 -148 99 10 - 13.625 0.374 9.809 0.371 0.000 0.000 21.326 1.560 0.054 0.006 1819.340 7.577 948.136 21.599 10.081 0.705 -5.718 0.542 1868.615 8.389 54.990 1.051 149 -149 86 10 - 14.127 0.142 12.241 0.000 0.000 0.000 20.385 0.001 0.054 0.000 1876.680 0.000 793.395 14.295 7.999 0.164 -5.168 0.000 1917.780 0.000 46.262 0.001 150 -151 139 10 - 14.803 0.643 14.398 0.000 0.000 0.000 22.842 0.000 0.044 0.000 1558.020 0.007 877.430 6.677 7.620 0.660 -0.349 0.000 1630.265 0.005 72.595 0.004 151 -152 92 10 - 16.300 0.353 12.552 0.000 0.000 0.000 18.676 0.000 0.062 0.000 1163.448 0.008 1243.952 7.434 15.781 0.397 1.090 0.000 1187.940 0.000 23.400 0.002 152 --
- After rigid body docking the first acceptable cluster is at rank 41. After refinement it scores at the top with score significantly better than the second-ranked cluster! -
--============================================== -== ./runs/scenario1-surface//4_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 2 out of 152 -Total number of medium or better clusters: 1 out of 152 -Total number of high quality clusters: 0 out of 152 + # execute + haddock3-+ {% endhighlight %} +
-First acceptable cluster - rank: 43 i-RMSD: 2.535 Fnat: 0.345 DockQ: 0.423 -First medium cluster - rank: 54 i-RMSD: 1.684 Fnat: 0.526 DockQ: 0.622 -Best cluster - rank: 54 i-RMSD: 1.684 Fnat: 0.526 DockQ: 0.622 -============================================== -== ./runs/scenario1-surface//9_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 2 out of 127 -Total number of medium or better clusters: 1 out of 127 -Total number of high quality clusters: 0 out of 127 -First acceptable cluster - rank: 1 i-RMSD: 1.678 Fnat: 0.733 DockQ: 0.669 -First medium cluster - rank: 1 i-RMSD: 1.678 Fnat: 0.733 DockQ: 0.669 -Best cluster - rank: 1 i-RMSD: 1.678 Fnat: 0.733 DockQ: 0.669 -
-============================================== -== ./runs/scenario1-surface/4_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 20 out of 1520 -Total number of medium or better models: 8 out of 1520 -Total number of high quality models: 0 out of 1520 +-+ Exection in MPI mode expand_more +
-First acceptable model - rank: 344 i-RMSD: 2.535 Fnat: 0.345 DockQ: 0.423 -First medium model - rank: 491 i-RMSD: 1.282 Fnat: 0.707 DockQ: 0.738 -Best model - rank: 559 i-RMSD: 1.282 Fnat: 0.707 DockQ: 0.738 -============================================== -== ./runs/scenario1-surface/9_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 22 out of 1475 -Total number of medium or better models: 13 out of 1475 -Total number of high quality models: 0 out of 1475 -First acceptable model - rank: 1 i-RMSD: 1.518 Fnat: 0.810 DockQ: 0.722 -First medium model - rank: 1 i-RMSD: 1.518 Fnat: 0.810 DockQ: 0.722 -Best model - rank: 17 i-RMSD: 1.197 Fnat: 0.879 DockQ: 0.811 -
- In terms of iRMSD values we only observe very small differences in the best models, but the change in ranking is impressive! - The fraction of native contacts and the DockQ scores are however improving much more after flexible refinement. - All this will of course depend on how different are the bound and unbound conformations and the amount of data - used to drive the docking process. In general, from our experience, the more and better data at hand, - the larger the conformational changes that can be induced. -
-- This is clearly not the case. The scoring function is not perfect, but does a reasonable job in ranking models of acceptable or better quality on top in this case. -
+ # go to the run directory + # edit if needed to specify the correct location + cd $HOME/HADDOCK3-antibody-antigen + + # execute + haddock3 \+ model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq rmsd cluster_id cluster_ranking model-cluster_ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg sym total vdw vean xpcs +../09_seletopclusts/cluster_1_model_1.pdb - 1 -140.319 0.908 0.897 2.205 1.451 0.855 1.016 3 1 1 133.760 0.000 0.000 2010.880 0.000 0.000 0.000 7.010 0.000 -605.174 0.000 0.000 0.000 0.000 -511.084 -39.671 0.000 0.000 +../09_seletopclusts/cluster_1_model_2.pdb - 2 -137.507 0.879 0.948 1.951 1.354 0.881 0.989 3 1 2 189.059 0.000 0.000 1913.390 0.000 0.000 0.000 3.243 0.000 -521.143 0.000 0.000 0.000 0.000 -387.512 -55.428 0.000 0.000 +../09_seletopclusts/cluster_1_model_3.pdb - 3 -126.481 1.052 0.914 3.038 1.958 0.824 1.293 3 1 3 127.044 0.000 0.000 1816.780 0.000 0.000 0.000 -2.884 0.000 -426.677 0.000 0.000 0.000 0.000 -350.599 -50.966 0.000 0.000 +../09_seletopclusts/cluster_1_model_4.pdb - 4 -102.227 1.334 0.793 2.331 2.292 0.760 1.341 3 1 4 128.628 0.000 0.000 1837.970 0.000 0.000 0.000 12.344 0.000 -410.669 0.000 0.000 0.000 0.000 -327.341 -45.299 0.000 0.000 +../09_seletopclusts/cluster_2_model_1.pdb - 5 -102.077 14.789 0.103 23.359 22.787 0.077 14.405 2 2 1 163.844 0.000 0.000 1888.310 0.000 0.000 0.000 2.575 0.000 -348.025 0.000 0.000 0.000 0.000 -235.613 -51.431 0.000 0.000 +... ++ +If clustering was performed prior to calling the `caprieval` module, the `capri_ss.tsv` file will also contain information about to which cluster the model belongs to and its ranking within the cluster. + +The relevant statistics are: + +* **score**: *the HADDOCK score (arbitrary units)* +* **irmsd**: *the interface RMSD, calculated over the interfaces the molecules* +* **fnat**: *the fraction of native contacts* +* **lrmsd**: *the ligand RMSD, calculated on the ligand after fitting on the receptor (1st component)* +* **ilrmsd**: *the interface-ligand RMSD, calculated over the interface of the ligand after fitting on the interface of the receptor (more relevant for small ligands for example)* +* **dockq**: *the DockQ score, which is a combination of irmsd, lrmsd and fnat and provides a continuous scale between 1 (exactly equal to reference) and 0* + +Various other terms are also reported including: + +* **bsa**: *the buried surface area (in squared angstroms)* +* **elec**: *the intermolecular electrostatic energy* +* **vdw**: *the intermolecular van der Waals energy* +* **desolv**: *the desolvation energy* + + +The iRMSD, lRMSD and Fnat metrics are the ones used in the blind protein-protein prediction experiment [CAPRI](https://capri.ebi.ac.uk/){:target="_blank"} (Critical PRediction of Interactions). + +In CAPRI the quality of a model is defined as (for protein-protein complexes): + +* **acceptable model**: i-RMSD < 4Å or l-RMSD < 10Å and Fnat > 0.1 (0.23 < DOCKQ < 0.49) +* **medium quality model**: i-RMSD < 2Å or l-RMSD < 5Å and Fnat > 0.3 (0.49 < DOCKQ < 0.8) +* **high quality model**: i-RMSD < 1Å or l-RMSD < 1Å and Fnat > 0.5 (DOCKQ > 0.8) + + +Based on these CAPRI criteria, what is the quality of the best model listed above (_cluster_1_model_1.pdb_)? -Models statistics: +In case where the `caprieval` module is called after a clustering step, an additional `capri_clt.tsv` file will be present in the directory. +This file contains the cluster ranking and score statistics, averaged over the minimum number of models defined for clustering +(4 by default), with their corresponding standard deviations. E.g.: -* [iRMSD versus HADDOCK score](plots/scenario1-surface/irmsd_score.html){:target="_blank"} -* [DockQ versus HADDOCK score](plots/scenario1-surface/dockq_score.html){:target="_blank"} -* [DockQ versus van der Waals energy](plots/scenario1-surface/dockq_vdw.html){:target="_blank"} -* [DockQ versus electrostatic energy](plots/scenario1-surface/dockq_elec.html){:target="_blank"} -* [DockQ versus ambiguous restraints energy](plots/scenario1-surface/dockq_air.html){:target="_blank"} -* [DockQ versus desolvation energy](plots/scenario1-surface/dockq_desolv.html){:target="_blank"} +
+cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std ilrmsd ilrmsd_std rmsd rmsd_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank + 1 3 4 - -126.634 15.010 1.044 0.180 0.888 0.058 2.381 0.403 0.830 0.045 1.764 0.382 1.160 0.159 144.623 25.775 1894.755 76.054 4.928 5.550 -490.916 78.318 -394.134 70.848 -47.841 5.927 1 + 2 2 4 - -98.425 2.624 14.572 0.524 0.095 0.009 23.293 0.233 0.074 0.002 22.593 0.371 14.300 0.194 159.227 8.415 1781.358 114.002 2.706 2.898 -340.312 32.395 -230.077 26.771 -48.992 5.015 2 + 3 1 4 - -91.137 1.918 10.249 0.530 0.056 0.007 19.692 0.505 0.078 0.005 18.190 0.649 10.554 0.495 173.598 42.201 1441.505 77.296 4.873 4.329 -389.212 18.467 -251.141 40.747 -35.527 5.170 3 +... +-Cluster statistics (distributions of values per cluster ordered according to their HADDOCK rank): -* [HADDOCK scores](plots/scenario1-surface/score_clt.html){:target="_blank"} -* [van der Waals energies](plots/scenario1-surface/vdw_clt.html){:target="_blank"} -* [electrostatic energies](plots/scenario1-surface/elec_clt.html){:target="_blank"} -* [ambiguous restraints energies](plots/scenario1-surface/air_clt.html){:target="_blank"} -* [desolvation energies](plots/scenario1-surface/desolv_clt.html){:target="_blank"} +In this file you find the cluster rank (which corresponds to the naming of the clusters in the previous `seletop` directory), the cluster ID (which is related to the size of the cluster, 1 being always the largest cluster), the number of models (n) in the cluster and the corresponding statistics (averages + standard deviations). The corresponding cluster PDB files will be found in the preceeding `09_seletopclusts` directory. +While these simple text files can be easily checked from the command line already, they might be cumbersome to read. +For that reason, we have developed a post-processing analysis that automatically generates html reports for all `caprieval` steps in the workflow. +These are located in the respective `analysis/XX_caprieval` directories and can be viewed using your favorite web browser. -For this antibody-antigen case, which of the score component is correlating best with the quality of the models?.
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 7 10 - -138.617 3.623 1.302 0.095 0.759 0.042 3.654 0.845 0.723 0.042 106.923 15.255 2007.338 29.881 4.998 2.064 -535.045 31.312 -475.421 23.968 -47.299 4.337 1 -2 2 17 - -114.732 8.026 14.934 0.048 0.069 0.000 23.085 0.282 0.066 0.001 160.053 15.937 1925.930 68.200 4.355 4.005 -383.893 65.058 -282.153 44.813 -58.313 7.769 2 -3 6 10 - -94.885 3.118 5.158 0.190 0.142 0.014 12.677 0.697 0.177 0.010 195.712 3.139 1678.185 4.775 8.844 0.929 -326.919 4.813 -189.123 5.020 -57.916 4.522 3 -4 1 20 - -85.733 5.280 8.889 0.683 0.116 0.049 17.160 0.949 0.115 0.023 212.853 21.459 1542.527 57.393 6.435 2.591 -328.754 32.768 -163.602 15.475 -47.702 7.750 4 -5 5 10 - -77.835 3.974 4.405 0.344 0.207 0.086 10.394 1.197 0.239 0.050 177.651 29.362 1644.388 54.484 11.269 2.084 -365.089 43.590 -221.289 30.831 -33.852 8.784 5 -6 4 10 - -69.846 2.776 6.854 0.076 0.142 0.007 14.212 0.179 0.150 0.004 305.336 14.743 1491.412 41.939 4.152 1.676 -302.880 31.549 -41.500 23.024 -43.955 4.249 6 -7 3 10 - -52.284 6.453 4.930 0.161 0.125 0.033 12.482 0.704 0.176 0.018 356.568 28.814 1228.580 69.244 5.359 1.823 -299.986 38.202 23.279 48.112 -33.303 2.374 7 --
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 1 10 - -7.065 0.204 14.798 0.000 0.069 0.000 23.304 0.000 0.066 0.000 800.213 0.006 1705.220 15.183 3.034 0.336 -1.039 0.001 798.194 0.002 -0.980 0.003 1 -2 4 10 - -4.905 0.322 1.247 0.000 0.690 0.000 2.258 0.000 0.738 0.000 830.946 0.006 1750.490 10.557 12.935 0.295 -8.707 0.001 828.476 0.002 6.237 0.004 2 -3 5 10 - -3.218 0.321 12.988 0.000 0.069 0.000 21.389 0.000 0.073 0.000 1250.720 0.007 1281.875 12.910 -2.070 0.282 -1.521 0.000 1317.727 0.004 68.529 0.003 3 -4 6 10 - -2.754 0.136 5.104 0.000 0.138 0.000 12.428 0.000 0.179 0.000 1109.805 0.005 1381.213 18.673 3.933 0.265 -4.214 0.000 1129.680 0.000 24.088 0.003 4 -5 3 10 - -2.534 0.181 8.639 0.000 0.121 0.000 16.823 0.000 0.118 0.000 1115.398 0.004 1139.723 20.158 4.226 0.286 -6.851 0.000 1141.970 0.000 33.428 0.001 5 -6 2 10 - 0.099 0.314 9.991 0.000 0.052 0.000 18.505 0.001 0.083 0.000 1069.392 0.061 1148.727 27.538 7.879 0.518 -7.227 0.001 1086.138 0.023 23.972 0.039 6 -7 8 10 - 3.994 0.158 4.033 0.001 0.121 0.000 10.360 0.000 0.215 0.000 1343.840 0.000 1164.535 21.845 7.108 0.196 -5.423 0.000 1390.040 0.000 51.623 0.001 7 -8 9 10 - 4.619 0.267 7.100 0.000 0.121 0.000 14.167 0.000 0.143 0.000 1523.870 0.007 1134.912 25.058 6.563 0.123 -6.186 0.000 1552.900 0.000 35.217 0.004 8 -9 7 10 - 10.174 0.445 4.776 0.000 0.086 0.000 11.416 0.000 0.178 0.000 1954.290 0.000 937.232 7.379 10.179 0.376 -10.952 0.000 2021.017 0.004 77.678 0.002 9 --
- After rigid body docking the first acceptable cluster is at rank 2. After refinement it scores at the top with score significantly better than the second-ranked cluster. -
-============================================== -== ./runs/scenario2a-NMR-epitope-pass/4_caprieval/capri_clt.tsv +== runs/run1/02_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better clusters: 1 out of 9 -Total number of medium or better clusters: 1 out of 9 -Total number of high quality clusters: 0 out of 9 +Total number of acceptable or better models: 25 out of 100 +Total number of medium or better models: 15 out of 100 +Total number of high quality models: 1 out of 100 -First acceptable cluster - rank: 2 i-RMSD: 1.247 Fnat: 0.690 DockQ: 0.738 -First medium cluster - rank: 2 i-RMSD: 1.247 Fnat: 0.690 DockQ: 0.738 -Best cluster - rank: 2 i-RMSD: 1.247 Fnat: 0.690 DockQ: 0.738 +First acceptable model - rank: 1 i-RMSD: 1.196 Fnat: 0.672 DockQ: 0.741 +First medium model - rank: 1 i-RMSD: 1.196 Fnat: 0.672 DockQ: 0.741 +Best model - rank: 17 i-RMSD: 0.982 Fnat: 0.759 DockQ: 0.774 ============================================== -== ./runs/scenario2a-NMR-epitope-pass/9_caprieval/capri_clt.tsv +== runs/run1/05_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better clusters: 1 out of 7 -Total number of medium or better clusters: 1 out of 7 -Total number of high quality clusters: 0 out of 7 - -First acceptable cluster - rank: 1 i-RMSD: 1.302 Fnat: 0.759 DockQ: 0.723 -First medium cluster - rank: 1 i-RMSD: 1.302 Fnat: 0.759 DockQ: 0.723 -Best cluster - rank: 1 i-RMSD: 1.302 Fnat: 0.759 DockQ: 0.723 --
+First acceptable model - rank: 1 i-RMSD: 0.992 Fnat: 0.897 DockQ: 0.834 +First medium model - rank: 1 i-RMSD: 0.992 Fnat: 0.897 DockQ: 0.834 +Best model - rank: 11 i-RMSD: 0.789 Fnat: 0.776 DockQ: 0.842 ============================================== -== ./runs/scenario2a-NMR-epitope-pass/4_caprieval/capri_ss.tsv +== runs/run1/07_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 10 out of 90 -Total number of medium or better models: 10 out of 90 -Total number of high quality models: 1 out of 90 +Total number of acceptable or better models: 14 out of 40 +Total number of medium or better models: 14 out of 40 +Total number of high quality models: 3 out of 40 -First acceptable model - rank: 11 i-RMSD: 1.247 Fnat: 0.690 DockQ: 0.738 -First medium model - rank: 11 i-RMSD: 1.247 Fnat: 0.690 DockQ: 0.738 -Best model - rank: 16 i-RMSD: 0.980 Fnat: 0.586 DockQ: 0.726 +First acceptable model - rank: 1 i-RMSD: 1.037 Fnat: 0.931 DockQ: 0.841 +First medium model - rank: 1 i-RMSD: 1.037 Fnat: 0.931 DockQ: 0.841 +Best model - rank: 11 i-RMSD: 0.841 Fnat: 0.897 DockQ: 0.875 ============================================== -== ./runs/scenario2a-NMR-epitope-pass/9_caprieval/capri_ss.tsv +== runs/run1/10_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 12 out of 87 -Total number of medium or better models: 10 out of 87 -Total number of high quality models: 0 out of 87 +Total number of acceptable or better models: 4 out of 12 +Total number of medium or better models: 4 out of 12 +Total number of high quality models: 1 out of 12 -First acceptable model - rank: 1 i-RMSD: 1.300 Fnat: 0.793 DockQ: 0.730 -First medium model - rank: 1 i-RMSD: 1.300 Fnat: 0.793 DockQ: 0.730 -Best model - rank: 5 i-RMSD: 1.029 Fnat: 0.810 DockQ: 0.811 +First acceptable model - rank: 1 i-RMSD: 1.037 Fnat: 0.931 DockQ: 0.841 +First medium model - rank: 1 i-RMSD: 1.037 Fnat: 0.931 DockQ: 0.841 +Best model - rank: 3 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855
- In terms of iRMSD values we only observe very small differences with a slight increase. - The fraction of native contacts and the DockQ scores are however improving much more after flexible refinement. - All this will of course depend on how different are the bound and unbound conformations and the amount of data - used to drive the docking process. In general, from our experience, the more and better data at hand, - the larger the conformational changes that can be induced. + In terms of iRMSD values, we only observe very small differences in the best model. + The fraction of native contacts and the DockQ scores are however improving much more after flexible refinement but increases again slightly after final minimisation. + All this will of course depend on how different are the bound and unbound conformations and the amount of data used to drive the docking process. + In general, from our experience, the more and better data at hand, the larger the conformational changes that can be induced.
- This is clearly not the case. The scoring function is not perfect, but does a reasonable job in ranking models of acceptable or better quality on top in this case. + This is not the case. The scoring function is not perfect, but does a reasonable job at ranking models of acceptable or better quality on top in this case.
Top-ranked model of the top cluster (cluster1_model_1) superimposed onto the reference crystal structure (in yellow)
++########################################################## +# `alascan` results for 4G6M_matched_haddock.pdb +# +# native score = -145.5891 +# +# z_score is calculated with respect to the other residues +########################################################## +chain res ori_resname end_resname score vdw elec desolv bsa delta_score delta_vdw delta_elec delta_desolv delta_bsa z_score +A 212 LYS ALA -136.33 -66.16 -367.66 3.37 1660.53 -9.26 2.52 -75.12 3.24 37.57 -0.48 +A 103 ASP ALA -129.64 -59.93 -365.23 3.34 1677.97 -15.95 -3.71 -77.56 3.27 20.13 -1.41 +A 54 TRP ALA -138.18 -58.34 -435.53 7.27 1690.80 -7.41 -5.30 -7.26 -0.66 7.30 -0.22 +A 32 SER ALA -143.66 -60.55 -447.37 6.36 1691.72 -1.93 -3.09 4.59 0.24 6.38 0.55 +A 58 ASP ALA -121.65 -63.49 -306.77 3.20 1639.20 -23.94 -0.15 -136.01 3.41 58.90 -2.52 +A 33 GLY ALA -148.50 -61.56 -473.22 7.71 1693.18 2.91 -2.08 30.43 -1.10 4.92 1.22 +... ++ + +Can you identify the most enriching/depleting mutation of each chain? +Take a look at _scan_clt_-.tsv_ and open its visualisation _scan_clt_-.html_ in the web browser. + + +You can use an additional script `/scripts/get-alascan-extrema.sh` to check your answer: + +bash scripts/get-alascan-extrema.sh run-energetics-alascan/1_alascan/scan_4G6M_matched_haddock.tsv + + +Mutation of the residue ASP58 turned out to be the most depleting within chain A. +Let us visualise it in PyMol to analyse its contribution to the binding: + +File menu -> Open -> 4G6M_matched.pdb + + +Display ASP58 as sticks and colour it by atom: + +util.cbc
+ 4G6K_abb_clean RMSD = 0.428 Å + 4G6K_af2_clean RMSD = 0.765 Å ++
+ 4G6K_abb_clean RMSD = 0.330 Å + 4G6K_af2_clean RMSD = 0.675 Å + 4G6K_clean RMSD = 0.393 Å ++
+============================================== +== run1/10_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 1 out of 3 +Total number of medium or better clusters: 1 out of 3 +Total number of high quality clusters: 0 out of 3 + +First acceptable cluster - rank: 1 i-RMSD: 1.049 Fnat: 0.879 DockQ: 0.815 +First medium cluster - rank: 1 i-RMSD: 1.049 Fnat: 0.879 DockQ: 0.815 +Best cluster - rank: 1 i-RMSD: 1.049 Fnat: 0.879 DockQ: 0.815 + +============================================== +== run1-abb/10_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 1 out of 5 +Total number of medium or better clusters: 1 out of 5 +Total number of high quality clusters: 0 out of 5 + +First acceptable cluster - rank: 1 i-RMSD: 1.134 Fnat: 0.841 DockQ: 0.796 +First medium cluster - rank: 1 i-RMSD: 1.134 Fnat: 0.841 DockQ: 0.796 +Best cluster - rank: 1 i-RMSD: 1.134 Fnat: 0.841 DockQ: 0.796 + +============================================== +== run1-af2/10_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 2 out of 3 +Total number of medium or better clusters: 0 out of 3 +Total number of high quality clusters: 0 out of 3 + +First acceptable cluster - rank: 1 i-RMSD: 3.974 Fnat: 0.289 DockQ: 0.239 +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: 3 i-RMSD: 3.305 Fnat: 0.302 DockQ: 0.290 ++
+============================================== +== run1/07_caprieval/capri_ss.tsv +============================================== +... +First acceptable model - rank: 1 i-RMSD: 1.037 Fnat: 0.931 DockQ: 0.841 +First medium model - rank: 1 i-RMSD: 1.037 Fnat: 0.931 DockQ: 0.841 +Best model - rank: 11 i-RMSD: 0.841 Fnat: 0.897 DockQ: 0.875 + +============================================== +== run1-abb/07_caprieval/capri_ss.tsv +============================================== +... +First acceptable model - rank: 1 i-RMSD: 0.990 Fnat: 0.931 DockQ: 0.860 +First medium model - rank: 1 i-RMSD: 0.990 Fnat: 0.931 DockQ: 0.860 +Best model - rank: 1 i-RMSD: 0.990 Fnat: 0.931 DockQ: 0.860 + +============================================== +== run1-af2/07_caprieval/capri_ss.tsv +============================================== +... +First acceptable model - rank: 1 i-RMSD: 3.246 Fnat: 0.362 DockQ: 0.389 +First medium model - rank: i-RMSD: Fnat: DockQ: +Best model - rank: 21 i-RMSD: 2.474 Fnat: 0.362 DockQ: 0.468 ++
+============================================== +== run1-ens//12_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 3 out of 11 +Total number of medium or better clusters: 1 out of 11 +Total number of high quality clusters: 1 out of 11 + +First acceptable cluster - rank: 1 i-RMSD: 0.981 Fnat: 0.918 DockQ: 0.850 +First medium cluster - rank: 1 i-RMSD: 0.981 Fnat: 0.918 DockQ: 0.850 +Best cluster - rank: 1 i-RMSD: 0.981 Fnat: 0.918 DockQ: 0.850 ++
+============================================== +== run1-ens//02_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 27 out of 150 +Total number of medium or better models: 11 out of 150 +Total number of high quality models: 1 out of 150 + +First acceptable model - rank: 2 i-RMSD: 1.422 Fnat: 0.586 DockQ: 0.631 +First medium model - rank: 2 i-RMSD: 1.422 Fnat: 0.586 DockQ: 0.631 +Best model - rank: 26 i-RMSD: 0.982 Fnat: 0.759 DockQ: 0.774 +============================================== +== run1-ens//05_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 16 out of 83 +Total number of medium or better models: 10 out of 83 +Total number of high quality models: 1 out of 83 + +First acceptable model - rank: 2 i-RMSD: 1.422 Fnat: 0.586 DockQ: 0.631 +First medium model - rank: 2 i-RMSD: 1.422 Fnat: 0.586 DockQ: 0.631 +Best model - rank: 24 i-RMSD: 0.982 Fnat: 0.759 DockQ: 0.774 +============================================== +== run1-ens//07_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 17 out of 83 +Total number of medium or better models: 9 out of 83 +Total number of high quality models: 4 out of 83 + +First acceptable model - rank: 1 i-RMSD: 0.836 Fnat: 0.931 DockQ: 0.878 +First medium model - rank: 1 i-RMSD: 0.836 Fnat: 0.931 DockQ: 0.878 +Best model - rank: 7 i-RMSD: 0.829 Fnat: 0.845 DockQ: 0.854 +============================================== +== run1-ens//09_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 16 out of 83 +Total number of medium or better models: 9 out of 83 +Total number of high quality models: 3 out of 83 + +First acceptable model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +First medium model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +Best model - rank: 12 i-RMSD: 0.851 Fnat: 0.845 DockQ: 0.851 +============================================== +== run1-ens//12_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 10 out of 44 +Total number of medium or better models: 4 out of 44 +Total number of high quality models: 2 out of 44 + +First acceptable model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +First medium model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +Best model - rank: 2 i-RMSD: 0.879 Fnat: 0.948 DockQ: 0.881 ++
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 1 17 - -151.013 2.855 1.957 0.466 0.642 0.060 4.965 0.721 0.593 0.080 61.922 16.739 1986.370 29.972 6.632 4.202 -596.659 47.783 -579.241 41.814 -44.505 9.407 1 -2 5 10 - -144.671 4.375 15.023 0.056 0.073 0.007 24.092 0.434 0.065 0.003 82.575 31.344 2050.332 52.230 1.109 1.544 -454.861 68.487 -435.350 64.144 -63.065 9.264 2 -3 4 10 - -103.148 2.915 9.859 0.357 0.065 0.025 19.699 0.784 0.082 0.011 118.571 18.485 1422.492 48.831 1.077 3.734 -395.524 36.286 -313.930 36.565 -36.977 2.057 3 -4 2 11 - -101.775 9.988 14.811 0.042 0.082 0.014 23.773 0.258 0.069 0.005 112.762 35.542 1733.245 136.770 5.903 1.639 -369.718 13.140 -301.966 31.881 -45.010 9.758 4 -5 12 4 - -101.717 16.264 2.883 0.465 0.414 0.073 6.588 0.667 0.421 0.039 123.812 49.768 1620.057 171.832 8.321 3.108 -510.810 25.365 -407.254 30.156 -20.257 18.482 5 -6 6 10 - -100.286 5.972 5.029 0.078 0.121 0.012 12.377 0.431 0.175 0.007 81.077 18.925 1527.888 25.942 6.652 1.287 -297.121 20.305 -271.666 30.603 -55.622 4.563 6 -7 3 10 - -98.426 7.151 4.147 0.602 0.341 0.092 8.377 1.144 0.324 0.059 163.456 41.462 1654.400 57.204 -1.567 4.125 -331.139 27.013 -214.660 64.132 -46.976 4.973 7 -8 9 8 - -91.198 8.435 3.061 0.163 0.509 0.029 7.763 0.751 0.417 0.024 95.083 5.502 1636.765 120.832 7.456 2.015 -383.475 21.166 -319.859 23.245 -31.468 7.111 8 -9 10 8 - -89.350 5.243 14.907 0.197 0.095 0.019 24.223 0.525 0.072 0.006 108.123 28.325 1698.345 93.675 4.693 2.411 -294.506 48.615 -232.337 53.247 -45.954 8.366 9 -10 7 10 - -78.992 5.846 7.589 0.167 0.164 0.008 16.501 0.352 0.137 0.002 141.345 58.004 1386.070 33.470 -0.716 1.791 -253.702 31.000 -154.028 33.843 -41.671 4.896 10 -11 11 6 - -77.052 4.065 3.956 0.826 0.341 0.071 8.653 1.743 0.327 0.075 171.334 27.914 1501.757 87.948 6.200 3.838 -329.716 23.812 -192.825 44.069 -34.442 5.191 11 -12 8 8 - -67.688 6.454 14.580 0.019 0.086 0.012 23.018 0.239 0.072 0.005 148.451 30.560 1652.513 97.272 3.496 1.602 -203.546 55.957 -100.415 43.751 -45.320 6.620 12 +* Antibody heavy chain: ++QVQLQESGPGLVKPSQTLSLTCSFSGFSLSTSGMGVGWIRQPSGKGLEWLAHIWWDGDES +YNPSLKSRLTISKDTSKNQVSLKITSVTAADTAVYFCARNRYDPPWFVDWGQGTLVTVSS-
+DIQMTQSTSSLSASVGDRVTITCRASQDISNYLSWYQQKPGKAVKLLIYYTSKLHSGVPS +RFSGSGSGTDYTLTISSLQQEDFATYFCLQGKMLPWTFGQGTKLEIK +-What is the rank of the best cluster generated? +* Antigen: +
+VRSLNCTLRDSQQKSLVMSGPYELKALHLQGQDMEQQVVFSMSFVQGEESNDKIPVALGL +KEKNLYLSCVLKDDKPTLQLESVDPKNYPKKKMEKRFVFNKIEINNKLEFESAQFPNWYI +STSQAENMPVFLGGTKGGQDITDFTMQFVSS ++
+QVQLQESGPGLVKPSQTLSLTCSFSGFSLSTSGMGVGWIRQPSGKGLEWLAHIWWDGDESYNPSLKSRLTISKDTSKNQVSLKITSVTAADTAVYFCARNRYDPPWFVDWGQGTLVTVSS:DIQMTQSTSSLSASVGDRVTITCRASQDISNYLSWYQQKPGKAVKLLIYYTSKLHSGVPSRFSGSGSGTDYTLTISSLQQEDFATYFCLQGKMLPWTFGQGTKLEIK:VRSLNCTLRDSQQKSLVMSGPYELKALHLQGQDMEQQVVFSMSFVQGEESNDKIPVALGLKEKNLYLSCVLKDDKPTLQLESVDPKNYPKKKMEKRFVFNKIEINNKLEFESAQFPNWYISTSQAENMPVFLGGTKGGQDITDFTMQFVSS ++ + + +Define the _jobname_, e.g. Ab-Ag + + + +In the _Advanced settings_ block you can check the option to save the results to your Google Drive (if you have an account) + + + +In the top section of the Colab, click: _Runtime > Run All_ + +(It may give a warning that this is not authored by Google because it is pulling code from GitHub - you can ignore it). -In this run we also had a `caprieval` after the clustering of the rigid body models (step 4 of our workflow). +This will automatically install, configure and run AlphaFold2 for you - leave this window open. +After the prediction completed, you will be asked to download a zip archive with the results (if you configured it to use Google Drive, a result archive will be automatically saved to your Google Drive). + +
-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank -1 6 10 - -14.099 0.485 2.534 0.060 0.328 0.000 6.091 0.186 0.416 0.006 497.929 84.662 1549.405 36.228 12.766 0.474 -16.378 0.493 484.394 89.658 2.844 5.623 1 -2 5 10 - -13.662 1.131 7.797 0.008 0.138 0.000 16.855 0.109 0.126 0.001 728.546 171.645 1173.928 15.754 -3.900 0.575 -5.855 0.132 777.477 174.737 54.786 8.386 2 -3 1 10 - -11.313 0.323 1.048 0.090 0.737 0.026 2.716 0.487 0.772 0.017 393.784 59.151 1658.795 79.415 11.225 0.683 -9.950 0.631 390.030 72.124 6.196 17.950 3 -4 3 10 - -9.839 0.266 5.023 0.016 0.168 0.007 12.101 0.046 0.194 0.003 408.853 105.262 1349.137 18.104 5.414 0.889 -5.624 0.400 380.563 108.924 -22.667 6.196 4 -5 4 10 - -9.326 1.678 14.812 0.010 0.069 0.000 23.503 0.056 0.065 0.000 380.995 98.521 1677.188 22.301 5.271 1.050 -1.377 0.204 353.849 106.067 -25.768 7.771 5 -7 11 10 - -9.256 0.749 4.689 0.384 0.237 0.019 10.175 0.794 0.248 0.021 997.843 119.928 1262.298 9.238 -4.968 0.214 -1.769 1.706 1008.617 125.287 12.542 8.265 6 -6 2 10 - -8.233 0.486 10.223 0.245 0.026 0.015 21.128 0.934 0.062 0.009 581.415 155.264 1036.009 53.300 1.424 0.492 -5.174 1.459 582.590 159.417 6.348 8.379 7 -8 7 10 - -3.262 0.604 14.741 0.012 0.069 0.000 23.565 0.052 0.065 0.000 676.043 49.459 1149.500 14.839 9.827 0.292 -8.472 0.095 679.402 52.646 11.831 3.553 8 -10 8 10 - -2.096 1.711 3.685 0.709 0.272 0.041 8.101 1.996 0.321 0.068 708.926 377.133 1167.398 7.469 11.135 1.233 -8.820 2.498 717.466 390.544 17.360 15.407 9 -9 9 10 - -1.996 0.489 3.210 0.084 0.332 0.026 8.203 0.501 0.343 0.009 398.137 131.501 1060.155 56.080 12.793 0.944 -8.149 1.179 387.985 134.607 -2.003 5.126 10 -12 10 10 - -1.280 2.048 14.353 0.513 0.038 0.008 22.816 0.551 0.057 0.003 869.164 86.949 1255.027 53.327 6.427 3.491 -4.390 2.183 918.969 91.260 54.194 7.974 11 -11 12 10 - -1.104 3.228 14.742 0.077 0.052 0.000 24.131 0.342 0.058 0.001 574.145 40.525 1146.227 48.875 4.858 2.295 -0.342 0.112 583.846 49.827 10.042 11.138 12 + ++ See the confidence statistics for the five generated models +
+ ++ Model1: pLDDT=90.4 pTM=0.654 ipTM=0.525 + Model2: pLDDT=88.0 pTM=0.65 ipTM=0.522 + Model3: pLDDT=88.2 pTM=0.647 ipTM=0.52 + Model4: pLDDT=88.0 pTM=0.644 ipTM=0.516 + Model5: pLDDT=88.1 pTM=0.641 ipTM=0.512+
+Note that if you performed a fresh run your results might well differ from those shown here. +
- After rigid body docking the first acceptable cluster is at rank 1 and the same is true after refinement, but the iRMSD values have improved. -
+-============================================== -== runs/scenario2b-NMR-epitope-act//4_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 4 out of 12 -Total number of medium or better clusters: 1 out of 12 -Total number of high quality clusters: 0 out of 12 +File menu -> Open -> select abagtest_2d03e_unrelaxed_rank_001_alphafold2_multimer_v3_model_3_seed_000.pdb -First acceptable cluster - rank: 1 i-RMSD: 2.534 Fnat: 0.328 DockQ: 0.416 -First medium cluster - rank: 3 i-RMSD: 1.048 Fnat: 0.737 DockQ: 0.772 -Best cluster - rank: 3 i-RMSD: 1.048 Fnat: 0.737 DockQ: 0.772 -============================================== -== runs/scenario2b-NMR-epitope-act//9_caprieval/capri_clt.tsv -============================================== -Total number of acceptable or better clusters: 4 out of 12 -Total number of medium or better clusters: 1 out of 12 -Total number of high quality clusters: 0 out of 12 +Repeat this for each model (`abagtest_2d03e_unrelaxed_rank_X_alphafold2_multimer_v3_model_X_seed_000.pdb` or whatever the naming of your model is). -First acceptable cluster - rank: 1 i-RMSD: 1.957 Fnat: 0.642 DockQ: 0.593 -First medium cluster - rank: 1 i-RMSD: 1.957 Fnat: 0.642 DockQ: 0.593 -Best cluster - rank: 1 i-RMSD: 1.957 Fnat: 0.642 DockQ: 0.593 --
-============================================== -== runs/scenario2b-NMR-epitope-act//4_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 34 out of 120 -Total number of medium or better models: 10 out of 120 -Total number of high quality models: 2 out of 120 +**Note:** You can turn on and off a model by clicking on its name in the right panel of the PyMOL window. -First acceptable model - rank: 2 i-RMSD: 2.533 Fnat: 0.328 DockQ: 0.416 -First medium model - rank: 11 i-RMSD: 1.035 Fnat: 0.724 DockQ: 0.757 -Best model - rank: 19 i-RMSD: 0.978 Fnat: 0.741 DockQ: 0.779 -============================================== -== runs/scenario2b-NMR-epitope-act//9_caprieval/capri_ss.tsv -============================================== -Total number of acceptable or better models: 35 out of 112 -Total number of medium or better models: 11 out of 112 -Total number of high quality models: 4 out of 112 +--First acceptable model - rank: 1 i-RMSD: 2.431 Fnat: 0.586 DockQ: 0.515 -First medium model - rank: 3 i-RMSD: 1.922 Fnat: 0.638 DockQ: 0.597 -Best model - rank: 10 i-RMSD: 0.758 Fnat: 0.845 DockQ: 0.871 -
- In this case we observe a small improvement in terms of iRMSD values and quite some large improvement in - the fraction of native contacts and the DockQ scores. Also the single model rankings have improved, but the top ranked model is not the best one. -
+- This is clearly not the case. The scoring function is not perfect, but does a reasonable job in ranking models of acceptable or better quality on top in this case. -
++ model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq rmsd cluster_id cluster_ranking model-cluster_ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg sym total vdw vean xpcs +../1_emscoring/emscoring_82.pdb - 1 -157.149 0.910 0.897 2.201 1.456 0.855 1.016 3 1 1 0.000 0.000 0.000 2000.130 0.000 0.000 0.000 7.345 0.000 -599.183 0.000 0.000 0.000 0.000 -643.841 -44.658 0.000 0.000 +../1_emscoring/emscoring_2.pdb - 2 -156.452 0.880 0.948 1.949 1.355 0.881 0.989 3 1 2 0.000 0.000 0.000 1914.860 0.000 0.000 0.000 3.125 0.000 -504.372 0.000 0.000 0.000 0.000 -563.075 -58.703 0.000 0.000 +../1_emscoring/emscoring_64.pdb - 3 -138.214 1.052 0.914 3.039 1.955 0.824 1.294 3 1 3 0.000 0.000 0.000 1784.350 0.000 0.000 0.000 -2.359 0.000 -424.542 0.000 0.000 0.000 0.000 -475.489 -50.947 0.000 0.000 +../1_emscoring/emscoring_40.pdb - 4 -135.230 1.085 0.897 1.866 1.756 0.836 1.144 3 1 4 0.000 0.000 0.000 1875.210 0.000 0.000 0.000 3.490 0.000 -429.067 0.000 0.000 0.000 0.000 -481.973 -52.906 0.000 0.000 +../1_emscoring/emscoring_37.pdb - 5 -134.569 13.624 0.069 22.589 21.764 0.068 13.881 5 2 1 0.000 0.000 0.000 1802.890 0.000 0.000 0.000 6.081 0.000 -426.815 0.000 0.000 0.000 0.000 -482.102 -55.287 0.000 0.000 - -show cartoon - - -util.cbc - - -color yellow, 4G6M_matched - +... +-Let us then superimpose all models on the reference structure: + +Did the HADDOCK scoring do a good job at putting the best models on top (consider for example the DockQ score)? + +The `emscoring` module renames all models, which makes it difficult to know what was the original model. +You can however trace back a model to its original file by looking into the `traceback/traceback.tsv` file: - -alignto 4G6M_matched - +
+00_topoaa 1_emscoring 1_emscoring_rank +emref_9_from_haddock3-ens-emref-ensemble_83_haddock.psf emscoring_82.pdb 1 +emref_10_from_haddock3-ens-emref-ensemble_2_haddock.psf emscoring_2.pdb 2 +emref_7_from_haddock3-ens-emref-ensemble_67_haddock.psf emscoring_64.pdb 3 +emref_5_from_haddock3-ens-emref-ensemble_45_haddock.psf emscoring_40.pdb 4 +... +-How close are the top4 models to the reference? Did HADDOCK do a good job at ranking the best in the top? +Try to locate the AlphaFold2 and AlphaFold3 models (their filenames start with _abag_test_ and _af3server_, respectively) -Let’s now check if the active residues which we have defined (the paratope and epitope) are actually part of the interface. In the PyMOL command window type: +A simple way to extra this information is to use `grep`: - -select paratope, (resi 31+32+33+34+35+52+54+55+56+100+101+102+103+104+105+106+1031+1032+1049+1050+1053+1091+1092+1093+1094+1096 and chain A) - - -color red, paratope + +grep abag traceback.tsv - -select epitope, (resi 72+73+74+75+81+83+84+89+90+92+94+96+97+98+115+116+117 and chain B) + +
+> grep abag traceback.tsv +abagtest_2d03e_unrelaxed_rank_001_alphafold2_multimer_v3_model_3_seed_000_from_af2-models_1_haddock.psf emscoring_84.pdb 86 +abagtest_2d03e_unrelaxed_rank_005_alphafold2_multimer_v3_model_2_seed_000_from_af2-models_5_haddock.psf emscoring_88.pdb 90 +abagtest_2d03e_unrelaxed_rank_004_alphafold2_multimer_v3_model_4_seed_000_from_af2-models_4_haddock.psf emscoring_87.pdb 91 +abagtest_2d03e_unrelaxed_rank_003_alphafold2_multimer_v3_model_1_seed_000_from_af2-models_3_haddock.psf emscoring_86.pdb 92 +abagtest_2d03e_unrelaxed_rank_002_alphafold2_multimer_v3_model_5_seed_000_from_af2-models_2_haddock.psf emscoring_85.pdb 93 ++
+> grep abag traceback.tsv +af3server_15052024_2_ready_from_af3-models_2_haddock.psf emscoring_90.pdb 40 +af3server_15052024_1_ready_from_af3-models_1_haddock.psf emscoring_89.pdb 81 +af3server_15052024_4_ready_from_af3-models_4_haddock.psf emscoring_92.pdb 87 +af3server_15052024_3_ready_from_af3-models_3_haddock.psf emscoring_91.pdb 88 +af3server_15052024_5_ready_from_af3-models_5_haddock.psf emscoring_93.pdb 89 ++
Top4 models of the top cluster of scenario2a superimposed onto the reference crystal structure (in yellow)
-The bottom eight models (the worst ranking ones) are all AlphaFold3/2 models. Looking at the componenents of the score + (some were left out in the table below for simplicity) one can see that it is mainly the van der Waals energy that causes the high scores, + which is indicative of clashes in the models.
++model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq rmsd bsa desolv elec vdw vean xpcs +... +../1_emscoring/emscoring_84.pdb - 86 -67.914 11.123 0.000 22.413 18.626 0.048 12.213 3535.520 -67.537 -150.913 29.806 +../1_emscoring/emscoring_92.pdb - 87 -63.263 11.426 0.000 22.104 21.035 0.049 11.048 1383.920 -9.924 -88.656 -35.607 +../1_emscoring/emscoring_91.pdb - 88 -50.990 13.665 0.000 23.793 22.150 0.042 13.796 1492.150 -8.962 -167.236 -8.581 +../1_emscoring/emscoring_93.pdb - 89 -46.871 6.644 0.000 10.617 11.333 0.146 6.455 1740.990 -8.906 -35.623 -30.841 +../1_emscoring/emscoring_88.pdb - 90 48.283 12.919 0.000 20.484 19.885 0.053 14.706 3914.250 -68.786 -129.461 142.961 +../1_emscoring/emscoring_87.pdb - 91 180.468 12.447 0.000 22.153 19.299 0.048 14.160 3639.430 -66.857 -240.130 295.351 +../1_emscoring/emscoring_86.pdb - 92 240.307 12.572 0.000 21.662 19.799 0.049 14.187 3535.820 -69.380 -154.703 340.628 +../1_emscoring/emscoring_85.pdb - 93 781.210 15.174 0.000 23.497 24.993 0.042 17.151 3278.340 -61.261 -86.026 859.677 +
1 32 33 34 35 52 54 55 56 100 101 102 103 104 105 106 151 152 169 170 173 211 212 213 214 216-* For the antigen we will use the NMR-identified epitope as active and the surface neighbors as passive. The corresponding file can be found in the `restraints` directory as `antigen-NMR-epitope.act-pass`: +* For the antigen we will use the NMR-identified epitope as active and the surface neighbors as passive. +The corresponding file can be found in the `restraints` directory as `antigen-NMR-epitope.act-pass`:
72 73 74 75 81 83 84 89 90 92 94 96 97 98 115 116 117 @@ -807,7 +809,8 @@ No output means that your TBL file is valid. ### Additional restraints for multi-chain proteins As an antibody consists of two separate chains, it is important to define a few distance restraints -to keep them together during the high temperature flexible refinement stage of HADDOCK otherwise they might slightly drift appart. This can easily be done using the `haddock3-restraints restrain_bodies` sub-command. +to keep them together during the high temperature flexible refinement stage of HADDOCK otherwise they might slightly drift appart. +This can easily be done using the `haddock3-restraints restrain_bodies` sub-command. haddock3-restraints restrain_bodies 4G6K_clean.pdb > antibody-unambig.tbl @@ -828,7 +831,9 @@ This file is also provided in the `restraints` directory. ## Setting up and running the docking with HADDOCK3 -Now that we have all required files at hand (PDB and restraints files), it is time to setup our docking protocol. In this tutorial, considering we have rather good information about the paratope and epitope, we will execute a fast HADDOCK3 docking workflow, reducing the non-negligible computational cost of HADDOCK by decreasing the sampling, without impacting too much the accuracy of the resulting models. +Now that we have all required files at hand (PDB and restraints files), it is time to setup our docking protocol. +In this tutorial, considering we have rather good information about the paratope and epitope, we will execute a fast HADDOCK3 docking workflow, +reducing the non-negligible computational cost of HADDOCK by decreasing the sampling, without impacting too much the accuracy of the resulting models. @@ -898,13 +903,14 @@ molecules = [ ambig_fname = "restraints/ambig-paratope-NMR-epitope.tbl" # Restraints to keep the antibody chains together unambig_fname = "restraints/antibody-unambig.tbl" +# Reduced sampling (50 instead of the default of 1000) sampling = 50 [caprieval] reference_fname = "pdbs/4G6M_matched.pdb" [seletop] -# Selection of the top 40 best scoring complexes +# Selection of the top 40 best scoring complexes (instead of the default of 200) select = 40 [flexref] @@ -944,15 +950,25 @@ reference_fname = "pdbs/4G6M_matched.pdb" {% endhighlight %} -In this case, since we have information for both interfaces we use a low-sampling configuration file, which takes only a small amount of computational resources to run. From the sampling parameters in the above config file, you can see we are sampling only 50 models at each stage of the docking: +In this case, since we have information for both interfaces we use a low-sampling configuration file, which takes only a small amount of computational resources to run. +The initial `sampling` parameter at the rigid-body energy minimization (`rigidbody`) module is set to 50 models, of which only best the 40 are passed to the flexible refinement (`flexref`) module with the `seletop` module. +The subsequence flexible refinement (`flexref` module) and energy minimisation (*emref*) modules will use all models passed by the *seletop* module. +FCC clustering (`clustfcc`) is then applied to group together models sharing a consistent fraction of the interface contacts. +The top 4 models of each cluster are saved to disk (`seletopclusts`). + +Multiple `caprieval` modules are executed at different stages of the workflow to check how the quality (and rankings) of the models change throughout the protocol. +In this case we are providing the known crystal structure of the complex as reference. -The initial `sampling` parameter at the rigid-body energy minimization (*rigidbody*) module is set to 50 models, of which only best the 40 are passed to the flexible refinement (*flexref*) module with the *seletop* module. -The subsequence flexible refinement (*flexref* module) and energy minimisation (*emref*) modules will use all models passed by the *seletop* module. -FCC clustering (*clustfcc*) is then applied to group together models sharing a consistent fraction of the interface contacts. -The top 4 models of each cluster are saved to disk (*seletopclusts*). -Multiple *caprieval* modules are executed at different stages of the workflow to check how the quality (and rankings) of the models change throughout the protocol. +**_Note_**: In case no reference is available (the usual scenario), the best ranked model is used as reference for each stage. +Including `caprieval` at the various stages even when no reference is provided is useful to get the rankings and scores and visualise the results (see Analysis section below). -To get a list of all possible parameters that can be defined in a specific module (and their default values) you can use the following command: +**_Note_**: The default sampling would be 1000 models for `rigidbody` of which 200 are passed to the flexible refinement in `seletop`. +As an indication of the computational requirements, the default sampling worflow for this tutorial completes in about XX minutes using 10 cores on a MaxOSX M2 processor. +In comparison, the reduced sampling run (50/40) takes about 6 1/2 minutes. + + + +**_Note_**: To get a list of all possible parameters that can be defined in a specific module (and their default values) you can use the following command: haddock3-cfg -m \@@ -987,7 +1003,7 @@ In in the first section of the workflow above we have a parameter `mode` definin -*__Hint__*: Use the `extract-capri-stats.sh` script to analyse the various runs and find the best (lowest i-RMSD or highest Dock-Q score). +*__Hint__*: Use the `extract-capri-stats.sh` script to analyse the various runs and find the best (lowest i-RMSD or highest Dock-Q score) as the `emref` stage.- Execution of Fugaku using a full node (EU-ASEAN HPC School) expand_more + Execution of Fugaku using a full node (ASEAN HPC School) expand_more
To execute the workflow on Fugaku, we will create a job file that will execute HADDOCK3 on a node, with HADDOCK3 running in local mode (the setup in the above configuration file with `mode="local"`) and harvesting all core of that node (`ncores=50`). @@ -1117,7 +1133,8 @@ have to define the `queue` name and the maximum number of concurrent jobs sent t -HADDOCK3 supports a parallel pseudo-MPI implementation (functional but still very experimental at this stage). For this to work, the `mpi4py` library must have been installed at installation time. Refer to the [MPI-related instructions](https://www.bonvinlab.org/haddock3/tutorials/mpi.html){:target="_blank"}. +HADDOCK3 supports a parallel pseudo-MPI implementation. For this to work, the `mpi4py` library must have been installed at installation time. +Refer to the [MPI-related instructions](https://www.bonvinlab.org/haddock3/tutorials/mpi.html){:target="_blank"}. The execution mode should be set to `mpi` and the total number of cores should match the requested resources when submitting to the batch system. @@ -1204,14 +1221,15 @@ You can find information about the duration of the run at the bottom of the log For example, the `09_seletopclusts` directory contains the selected models from each cluster. The clusters in that directory are numbered based on their rank, i.e. `cluster_1` refers to the top-ranked cluster. Information about the origin of these files can be found in that directory in the `seletopclusts.txt` file. -The simplest way to extract ranking information and the corresponding HADDOCK scores is to look at the `10_caprieval` directories (which is why it is a good idea to have it as the final module, and possibly as intermediate steps). This directory will always contain a `capri_ss.tsv` single model statistics file, which contains the model names, rankings and statistics (score, iRMSD, Fnat, lRMSD, ilRMSD and dockq score). E.g.: +The simplest way to extract ranking information and the corresponding HADDOCK scores is to look at the `XX_caprieval` directories (which is why it is a good idea to have it as the final module, and possibly as intermediate steps). This directory will always contain a `capri_ss.tsv` single model statistics file, which contains the model names, rankings and statistics (score, iRMSD, Fnat, lRMSD, ilRMSD and dockq score). E.g. for `10_caprieval`:- model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq cluster_id cluster_ranking model-cluster_ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg sym total vdw vean xpcs -../06_emref/emref_1.pdb - 1 -142.417 1.193 0.862 2.242 2.261 0.803 - - - 61.388 0.000 0.000 1884.490 0.000 0.000 0.000 6.496 0.000 -546.456 0.000 0.000 0.000 0.000 -530.829 -45.760 0.000 0.000 -../06_emref/emref_2.pdb - 2 -142.268 0.957 0.948 1.681 1.512 0.874 - - - 78.754 0.000 0.000 1849.190 0.000 0.000 0.000 0.557 0.000 -497.733 0.000 0.000 0.000 0.000 -470.134 -51.154 0.000 0.000 -../06_emref/emref_3.pdb - 3 -142.107 1.040 0.931 1.985 1.675 0.852 - - - 44.821 0.000 0.000 1886.680 0.000 0.000 0.000 -0.829 0.000 -491.378 0.000 0.000 0.000 0.000 -494.041 -47.484 0.000 0.000 -../06_emref/emref_8.pdb - 4 -133.948 1.063 0.931 2.135 1.719 0.846 - - - 104.785 0.000 0.000 1746.970 0.000 0.000 0.000 3.183 0.000 -481.057 0.000 0.000 0.000 0.000 -427.670 -51.398 0.000 0.000 + model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq rmsd cluster_id cluster_ranking model-cluster_ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg sym total vdw vean xpcs +../09_seletopclusts/cluster_1_model_1.pdb - 1 -140.319 0.908 0.897 2.205 1.451 0.855 1.016 3 1 1 133.760 0.000 0.000 2010.880 0.000 0.000 0.000 7.010 0.000 -605.174 0.000 0.000 0.000 0.000 -511.084 -39.671 0.000 0.000 +../09_seletopclusts/cluster_1_model_2.pdb - 2 -137.507 0.879 0.948 1.951 1.354 0.881 0.989 3 1 2 189.059 0.000 0.000 1913.390 0.000 0.000 0.000 3.243 0.000 -521.143 0.000 0.000 0.000 0.000 -387.512 -55.428 0.000 0.000 +../09_seletopclusts/cluster_1_model_3.pdb - 3 -126.481 1.052 0.914 3.038 1.958 0.824 1.293 3 1 3 127.044 0.000 0.000 1816.780 0.000 0.000 0.000 -2.884 0.000 -426.677 0.000 0.000 0.000 0.000 -350.599 -50.966 0.000 0.000 +../09_seletopclusts/cluster_1_model_4.pdb - 4 -102.227 1.334 0.793 2.331 2.292 0.760 1.341 3 1 4 128.628 0.000 0.000 1837.970 0.000 0.000 0.000 12.344 0.000 -410.669 0.000 0.000 0.000 0.000 -327.341 -45.299 0.000 0.000 +../09_seletopclusts/cluster_2_model_1.pdb - 5 -102.077 14.789 0.103 23.359 22.787 0.077 14.405 2 2 1 163.844 0.000 0.000 1888.310 0.000 0.000 0.000 2.575 0.000 -348.025 0.000 0.000 0.000 0.000 -235.613 -51.431 0.000 0.000 ...@@ -1243,7 +1261,7 @@ In CAPRI the quality of a model is defined as (for protein-protein complexes): * **high quality model**: i-RMSD < 1Å or l-RMSD < 1Å and Fnat > 0.5 (DOCKQ > 0.8) -Based on this CAPRI criterion, what is the quality of the best model listed above (emref_2.pdb)? +Based on these CAPRI criteria, what is the quality of the best model listed above (_cluster_1_model_1.pdb_)? In case where the `caprieval` module is called after a clustering step, an additional `capri_clt.tsv` file will be present in the directory. @@ -1251,16 +1269,15 @@ This file contains the cluster ranking and score statistics, averaged over the m (4 by default), with their corresponding standard deviations. E.g.:-cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank - 1 2 4 - -140.185 3.603 1.063 0.085 0.918 0.033 2.011 0.211 0.844 0.026 72.437 22.198 1841.833 56.754 2.352 2.793 -504.156 25.137 -480.668 37.466 -48.949 2.407 1 - 2 4 4 - -104.627 9.604 4.985 0.167 0.159 0.022 10.983 0.735 0.206 0.017 140.887 17.004 1599.765 101.246 3.738 2.425 -267.555 26.639 -195.611 16.008 -68.943 5.880 2 - 3 1 4 - -90.803 5.270 10.263 0.837 0.086 0.017 19.261 1.307 0.091 0.012 139.801 40.076 1431.878 53.377 3.217 6.569 -335.970 38.177 -236.975 36.344 -40.806 2.883 3 - 4 3 4 - -90.321 12.145 14.645 0.132 0.099 0.007 23.305 0.134 0.076 0.003 154.818 25.452 1792.695 68.993 5.937 1.759 -308.110 28.984 -203.410 46.861 -50.118 6.689 4 +cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std ilrmsd ilrmsd_std rmsd rmsd_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank + 1 3 4 - -126.634 15.010 1.044 0.180 0.888 0.058 2.381 0.403 0.830 0.045 1.764 0.382 1.160 0.159 144.623 25.775 1894.755 76.054 4.928 5.550 -490.916 78.318 -394.134 70.848 -47.841 5.927 1 + 2 2 4 - -98.425 2.624 14.572 0.524 0.095 0.009 23.293 0.233 0.074 0.002 22.593 0.371 14.300 0.194 159.227 8.415 1781.358 114.002 2.706 2.898 -340.312 32.395 -230.077 26.771 -48.992 5.015 2 + 3 1 4 - -91.137 1.918 10.249 0.530 0.056 0.007 19.692 0.505 0.078 0.005 18.190 0.649 10.554 0.495 173.598 42.201 1441.505 77.296 4.873 4.329 -389.212 18.467 -251.141 40.747 -35.527 5.170 3 ...-In this file you find the cluster rank, the cluster ID (which is related to the size of the cluster, 1 being always the largest cluster), the number of models (n) in the cluster and the corresponding statistics (averages + standard deviations). The corresponding cluster PDB files will be found in the preceeding `09_seletopclusts` directory. +In this file you find the cluster rank (which corresponds to the naming of the clusters in the previous `seletop` directory), the cluster ID (which is related to the size of the cluster, 1 being always the largest cluster), the number of models (n) in the cluster and the corresponding statistics (averages + standard deviations). The corresponding cluster PDB files will be found in the preceeding `09_seletopclusts` directory. While these simple text files can be easily checked from the command line already, they might be cumbersome to read. For that reason, we have developed a post-processing analysis that automatically generates html reports for all `caprieval` steps in the workflow. @@ -1281,7 +1298,7 @@ Simply click on the arrows of the term you want to use to sort the table (and yo A snapshot of this table is shown below:- You can also view this report online [here](plots/report.html){:target="_blank"} @@ -1316,7 +1333,7 @@ These are interactive plots. A menu on the top right of the first row (you might allows you to zoom in and out in the plots and turn on and off clusters.+
![]()
- As a reminder, you can also view this report online [here](plots/report.html){:target="_blank"} @@ -1329,7 +1346,7 @@ Examine the plots (remember here that higher DockQ values and lower i-RMSD value Finally, the report also shows plots of the cluster statistics (distributions of values per cluster ordered according to their HADDOCK rank):+
![]()
- For this antibody-antigen case, which of the score components correlates best with the quality of the models? @@ -1344,7 +1361,7 @@ Going back to command line analysis, we are providing in the `scripts` directory To use it, simply call the script with as argument the run directory you want to analyze, e.g.: -./scripts/extract-capri-stats.sh ./runs/run1-CDR-NMR-CSP +./scripts/extract-capri-stats.sh ./runs/run1+
@@ -1353,45 +1370,45 @@ To use it, simply call the script with as argument the run directory you want to============================================== -== runs/run1-CDR-NMR-CSP/02_caprieval/capri_ss.tsv +== run1/02_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 10 out of 50 -Total number of medium or better models: 7 out of 50 -Total number of high quality models: 0 out of 50 - -First acceptable model - rank: 1 i-RMSD: 1.181 Fnat: 0.690 DockQ: 0.749 -First medium model - rank: 1 i-RMSD: 1.181 Fnat: 0.690 DockQ: 0.749 -Best model - rank: 2 i-RMSD: 1.074 Fnat: 0.707 DockQ: 0.731 +Total number of acceptable or better models: 9 out of 50 +Total number of medium or better models: 4 out of 50 +Total number of high quality models: 1 out of 50 + +First acceptable model - rank: 1 i-RMSD: 1.034 Fnat: 0.707 DockQ: 0.744 +First medium model - rank: 1 i-RMSD: 1.034 Fnat: 0.707 DockQ: 0.744 +Best model - rank: 7 i-RMSD: 0.982 Fnat: 0.759 DockQ: 0.774 ============================================== -== runs/run1-CDR-NMR-CSP/05_caprieval/capri_ss.tsv +== run1/05_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 8 out of 40 -Total number of medium or better models: 7 out of 40 -Total number of high quality models: 1 out of 40 - -First acceptable model - rank: 1 i-RMSD: 1.145 Fnat: 0.828 DockQ: 0.798 -First medium model - rank: 1 i-RMSD: 1.145 Fnat: 0.828 DockQ: 0.798 -Best model - rank: 2 i-RMSD: 0.936 Fnat: 0.948 DockQ: 0.877 +Total number of acceptable or better models: 7 out of 40 +Total number of medium or better models: 4 out of 40 +Total number of high quality models: 2 out of 40 + +First acceptable model - rank: 1 i-RMSD: 0.836 Fnat: 0.931 DockQ: 0.878 +First medium model - rank: 1 i-RMSD: 0.836 Fnat: 0.931 DockQ: 0.878 +Best model - rank: 1 i-RMSD: 0.836 Fnat: 0.931 DockQ: 0.878 ============================================== -== runs/run1-CDR-NMR-CSP/07_caprieval/capri_ss.tsv +== run1/07_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 8 out of 40 -Total number of medium or better models: 7 out of 40 -Total number of high quality models: 1 out of 40 - -First acceptable model - rank: 1 i-RMSD: 1.193 Fnat: 0.862 DockQ: 0.803 -First medium model - rank: 1 i-RMSD: 1.193 Fnat: 0.862 DockQ: 0.803 -Best model - rank: 2 i-RMSD: 0.957 Fnat: 0.948 DockQ: 0.874 +Total number of acceptable or better models: 7 out of 40 +Total number of medium or better models: 4 out of 40 +Total number of high quality models: 2 out of 40 + +First acceptable model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +First medium model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +Best model - rank: 2 i-RMSD: 0.879 Fnat: 0.948 DockQ: 0.881 ============================================== -== runs/run1-CDR-NMR-CSP/10_caprieval/capri_ss.tsv +== run1/10_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 4 out of 16 -Total number of medium or better models: 4 out of 16 -Total number of high quality models: 1 out of 16 - -First acceptable model - rank: 1 i-RMSD: 1.193 Fnat: 0.862 DockQ: 0.803 -First medium model - rank: 1 i-RMSD: 1.193 Fnat: 0.862 DockQ: 0.803 -Best model - rank: 2 i-RMSD: 0.957 Fnat: 0.948 DockQ: 0.874 +Total number of acceptable or better models: 4 out of 12 +Total number of medium or better models: 4 out of 12 +Total number of high quality models: 2 out of 12 + +First acceptable model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +First medium model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +Best model - rank: 2 i-RMSD: 0.879 Fnat: 0.948 DockQ: 0.881
@@ -1427,7 +1444,7 @@ _**Note**_ that this kind of analysis only makes sense when we know the referenc
- +**_Note_**: A similar script to extract cluster statistics is available in the `scripts` directory as `extract-capri-stats-clt.sh`.
### Contacts analysis @@ -1452,26 +1469,24 @@ Can you identify which residue(s) make(s) the most intermolecular contacts? ### Visualization of the models -To visualize the models from the top cluster of your favorite run, start PyMOL and load the cluster representatives you want to view, e.g. this could be the top model from cluster1 for run `run1-CDR-NMR-CSP`. +To visualize the models from the top cluster of your favorite run, start PyMOL and load the cluster representatives you want to view, e.g. this could be the top model from cluster3 for run `run1-CDR-NMR-CSP`. These can be found in the `runs/run1/09_seletopclusts/` directory. -File menu -> Open -> select cluster_1_model_1.pdb +File menu -> Open -> select cluster_1_model_1.pdb *__Note__* that the PDB files are compressed (gzipped) by default at the end of a run. You can uncompress those with the `gunzip` command. PyMOL can directly read the gzipped files. If you want to get an impression of how well-defined a cluster is, repeat this for the best N models you want to view (`cluster_1_model_X.pdb`). Also load the reference structure from the `pdbs` directory, `4G6M-matched.pdb`. +File menu -> Open -> select 4G6M-matched.pdb + Once all files have been loaded, type in the PyMOL command window: -show cartoon - - -util.cbc - - -color yellow, 4G6M_matched +show cartoon
+util.cbc
+color yellow, 4G6M_matched
Let us then superimpose all models onto the reference structure: @@ -1487,16 +1502,10 @@ How close are the top4 models to the reference? Did HADDOCK do a good job at ran Let’s now check if the active residues which we have defined (the paratope and epitope) are actually part of the interface. In the PyMOL command window type: -select paratope, (resi 31+32+33+34+35+52+54+55+56+100+101+102+103+104+105+106+151+152+169+170+173+211+212+213+214+216 and chain A) - - -color red, paratope - - -select epitope, (resi 72+73+74+75+81+83+84+89+90+92+94+96+97+98+115+116+117 and chain B) - - -color orange, epitope +select paratope, (resi 31+32+33+34+35+52+54+55+56+100+101+102+103+104+105+106+151+152+169+170+173+211+212+213+214+216 and chain A)
+color red, paratope
+select epitope, (resi 72+73+74+75+81+83+84+89+90+92+94+96+97+98+115+116+117 and chain B)
+color orange, epitope
@@ -1509,9 +1518,9 @@ Are the residues of the paratope and NMR epitope at the interface?See the overlay of the top ranked model onto the reference structure expand_more -Top-ranked model of the top cluster superimposed onto the reference crystal structure (in yellow)
+Top-ranked model of the top cluster (cluster1_model_1) superimposed onto the reference crystal structure (in yellow)
- +
![]()
@@ -1525,7 +1534,7 @@ Are the residues of the paratope and NMR epitope at the interface? We have demonstrated the usage of HADDOCK3 in an antibody-antigen docking scenario making use of the paratope information on the antibody side (i.e. no prior experimental information, but computational predictions) and an NMR-mapped epitope for the antigen. Compared to the static HADDOCK2.X workflow, the modularity and flexibility of HADDOCK3 allow to customise the docking protocols and to run a deeper analysis of the results. -While HADDOCK3 is still very much a work in progress, its intrinsic flexibility can be used to improve the performance of antibody-antigen modelling compared to the results we presented in our +HADDOCK3's intrinsic flexibility can be used to improve the performance of antibody-antigen modelling compared to the results we presented in our [Structure 2020](https://doi.org/10.1016/j.str.2019.10.011){:target="_blank"} article and in the [related HADDOCK2.4 tutorial](/education/HADDOCK24/HADDOCK24-antibody-antigen){:target="_blank"}. @@ -1559,7 +1568,7 @@ How many interface clusters were found for this protein? Once you download the output archive, you can find the clustering information presented in the dendrogram:- We can see how the two *4G6M* antibody chains are recognized as a unique cluster, clearly separated from the other binding surfaces and, in particular, from those proper to IL-1RI (uniprot ID P14778). @@ -1610,13 +1619,13 @@ As was demonstrated in the tutorial, those files must be preprocessed for their Load the experimental unbound structure (`4G6K_clean.pdb`) and the two AI models in PyMOL to see whether they resemble the experimental unbound structure. - + File menu -> Open -> select 4G6K_clean.pdb - + File menu -> Open -> select 4G6K_abb_clean.pdb - + File menu -> Open -> select 4G6K_af2_clean.pdb @@ -1646,12 +1655,12 @@ For docking purposes however, it might be more interesting to know how far are t The closer it is, the easier it should become to model the complex by docking. To assess this, we can load the structure of the complex in PyMOL and align all other structures/models to it. - + File menu -> Open -> select 4G6M_matched.pdb -File menu -> Open -> color yellow, 4G6M_matched +color yellow, 4G6M_matched Align now the models to the experimental bound structure @@ -1700,37 +1709,37 @@ Which starting structure of the antibody gives the best results in terms of clus+
============================================== -== runs/run1-CDR-NMR-CSP/10_caprieval/capri_clt.tsv +== run1/10_caprieval/capri_clt.tsv ============================================== -Total number of acceptable or better clusters: 1 out of 4 -Total number of medium or better clusters: 1 out of 4 -Total number of high quality clusters: 0 out of 4 - -First acceptable cluster - rank: 1 i-RMSD: 1.063 Fnat: 0.918 DockQ: 0.844 -First medium cluster - rank: 1 i-RMSD: 1.063 Fnat: 0.918 DockQ: 0.844 -Best cluster - rank: 1 i-RMSD: 1.063 Fnat: 0.918 DockQ: 0.844 +Total number of acceptable or better clusters: 1 out of 3 +Total number of medium or better clusters: 1 out of 3 +Total number of high quality clusters: 0 out of 3 + +First acceptable cluster - rank: 1 i-RMSD: 1.044 Fnat: 0.888 DockQ: 0.830 +First medium cluster - rank: 1 i-RMSD: 1.044 Fnat: 0.888 DockQ: 0.830 +Best cluster - rank: 1 i-RMSD: 1.044 Fnat: 0.888 DockQ: 0.830 ============================================== -== runs/run1-abb-CDR-NMR-CSP/10_caprieval/capri_clt.tsv +== run1-abb/10_caprieval/capri_clt.tsv ============================================== -Total number of acceptable or better clusters: 1 out of 2 -Total number of medium or better clusters: 1 out of 2 -Total number of high quality clusters: 0 out of 2 - -First acceptable cluster - rank: 1 i-RMSD: 1.197 Fnat: 0.845 DockQ: 0.796 -First medium cluster - rank: 1 i-RMSD: 1.197 Fnat: 0.845 DockQ: 0.796 -Best cluster - rank: 1 i-RMSD: 1.197 Fnat: 0.845 DockQ: 0.796 +Total number of acceptable or better clusters: 1 out of 5 +Total number of medium or better clusters: 1 out of 5 +Total number of high quality clusters: 0 out of 5 + +First acceptable cluster - rank: 1 i-RMSD: 1.134 Fnat: 0.841 DockQ: 0.796 +First medium cluster - rank: 1 i-RMSD: 1.134 Fnat: 0.841 DockQ: 0.796 +Best cluster - rank: 1 i-RMSD: 1.134 Fnat: 0.841 DockQ: 0.796 ============================================== -== runs/run1-CDR-NMR-CSP-af2/10_caprieval/capri_clt.tsv +== run1-af2/10_caprieval/capri_clt.tsv ============================================== -Total number of acceptable or better clusters: 3 out of 5 -Total number of medium or better clusters: 0 out of 5 -Total number of high quality clusters: 0 out of 5 - -First acceptable cluster - rank: 1 i-RMSD: 2.458 Fnat: 0.474 DockQ: 0.486 -First medium cluster - rank: - i-RMSD: - Fnat: - DockQ: - -Best cluster - rank: 1 i-RMSD: 2.458 Fnat: 0.474 DockQ: 0.486 +Total number of acceptable or better clusters: 1 out of 4 +Total number of medium or better clusters: 0 out of 4 +Total number of high quality clusters: 0 out of 4 + +First acceptable cluster - rank: 3 i-RMSD: 3.412 Fnat: 0.302 DockQ: 0.275 +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: 3 i-RMSD: 3.412 Fnat: 0.302 DockQ: 0.275
@@ -1740,7 +1749,7 @@ Best cluster - rank: 1 i-RMSD: 2.458 Fnat: 0.474 DockQ: 0.486 Which starting structure of the antibody gives the best overall model (irrespective of the ranking)?@@ -1825,9 +1834,6 @@ run_dir = "run-ens-CDR-NMR-CSP" mode = "local" ncores = 50 -# Self contained rundir (to avoid problems with long filename paths) -self_contained = true - # Post-processing to generate statistics and plots postprocess = true clean = true @@ -1847,9 +1853,13 @@ molecules = [ ambig_fname = "restraints/ambig-paratope-NMR-epitope.tbl" # Restraints to keep the antibody chains together unambig_fname = "restraints/antibody-unambig.tbl" -# Increased sampling so each conformation is sampled 50 times +# Reduced sampling (150 instead of the default of 1000) +# Increased to 150 so that each conformation is sampled 50 times sampling = 150 +[caprieval] +reference_fname = "pdbs/4G6M_matched.pdb" + [clustfcc] plot_matrix = true @@ -1916,7 +1926,7 @@ Compared to the original workflow described in this tutorial we have added clust Run haddock3 with this configuration file as described above. -A pre-calculated run is provided in the `runs` directory as `run1-ens-clst`. +A pre-calculated run is provided in the `runs` directory as `run1-ens`. Analyse your run (or the pre-calculated ones) as described previously. @@ -1926,15 +1936,15 @@ Analyse your run (or the pre-calculated ones) as described previously.@@ -1748,37 +1757,37 @@ Which starting structure of the antibody gives the best overall model (irrespect
============================================== -== runs/run1-CDR-NMR-CSP/07_caprieval/capri_ss.tsv +== run1/07_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 8 out of 40 -Total number of medium or better models: 7 out of 40 -Total number of high quality models: 1 out of 40 - -First acceptable model - rank: 1 i-RMSD: 1.193 Fnat: 0.862 DockQ: 0.803 -First medium model - rank: 1 i-RMSD: 1.193 Fnat: 0.862 DockQ: 0.803 -Best model - rank: 2 i-RMSD: 0.957 Fnat: 0.948 DockQ: 0.874 +Total number of acceptable or better models: 7 out of 40 +Total number of medium or better models: 4 out of 40 +Total number of high quality models: 2 out of 40 + +First acceptable model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +First medium model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +Best model - rank: 2 i-RMSD: 0.879 Fnat: 0.948 DockQ: 0.881 ============================================== -== runs/run1-abb-CDR-NMR-CSP/07_caprieval/capri_ss.tsv +== run1-abb/07_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 13 out of 40 -Total number of medium or better models: 9 out of 40 -Total number of high quality models: 1 out of 40 - -First acceptable model - rank: 1 i-RMSD: 1.406 Fnat: 0.862 DockQ: 0.775 -First medium model - rank: 1 i-RMSD: 1.406 Fnat: 0.862 DockQ: 0.775 -Best model - rank: 4 i-RMSD: 0.862 Fnat: 0.879 DockQ: 0.870 +Total number of acceptable or better models: 5 out of 40 +Total number of medium or better models: 4 out of 40 +Total number of high quality models: 1 out of 40 + +First acceptable model - rank: 1 i-RMSD: 0.990 Fnat: 0.931 DockQ: 0.860 +First medium model - rank: 1 i-RMSD: 0.990 Fnat: 0.931 DockQ: 0.860 +Best model - rank: 1 i-RMSD: 0.990 Fnat: 0.931 DockQ: 0.860 ============================================== -== runs/run1-CDR-NMR-CSP-af2/07_caprieval/capri_ss.tsv +== run1-af2/07_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 15 out of 40 -Total number of medium or better models: 1 out of 40 -Total number of high quality models: 0 out of 40 - -First acceptable model - rank: 1 i-RMSD: 2.780 Fnat: 0.362 DockQ: 0.421 -First medium model - rank: 10 i-RMSD: 1.654 Fnat: 0.707 DockQ: 0.645 -Best model - rank: 10 i-RMSD: 1.654 Fnat: 0.707 DockQ: 0.645 +Total number of acceptable or better models: 10 out of 40 +Total number of medium or better models: 0 out of 40 +Total number of high quality models: 0 out of 40 + +First acceptable model - rank: 1 i-RMSD: 2.483 Fnat: 0.586 DockQ: 0.545 +First medium model - rank: i-RMSD: Fnat: DockQ: +Best model - rank: 21 i-RMSD: 2.396 Fnat: 0.448 DockQ: 0.513
============================================== -== runs/run-ens-CDR-NMR-CSP/11_caprieval/capri_clt.tsv +== run1-ens//11_caprieval/capri_clt.tsv ============================================== -Total number of acceptable or better clusters: 4 out of 11 +Total number of acceptable or better clusters: 3 out of 11 Total number of medium or better clusters: 1 out of 11 -Total number of high quality clusters: 0 out of 11 - -First acceptable cluster - rank: 1 i-RMSD: 1.188 Fnat: 0.862 DockQ: 0.795 -First medium cluster - rank: 1 i-RMSD: 1.188 Fnat: 0.862 DockQ: 0.795 -Best cluster - rank: 1 i-RMSD: 1.188 Fnat: 0.862 DockQ: 0.795 +Total number of high quality clusters: 1 out of 11 + +First acceptable cluster - rank: 1 i-RMSD: 0.981 Fnat: 0.918 DockQ: 0.850 +First medium cluster - rank: 1 i-RMSD: 0.981 Fnat: 0.918 DockQ: 0.850 +Best cluster - rank: 1 i-RMSD: 0.981 Fnat: 0.918 DockQ: 0.850
@@ -1947,35 +1957,45 @@ Best cluster - rank: 1 i-RMSD: 1.188 Fnat: 0.862 DockQ: 0.795============================================== -== runs/run-ens-CDR-NMR-CSP/04_caprieval/capri_ss.tsv +== run1-ens//04_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 25 out of 83 +Total number of acceptable or better models: 16 out of 83 Total number of medium or better models: 10 out of 83 -Total number of high quality models: 0 out of 83 - -First acceptable model - rank: 3 i-RMSD: 1.238 Fnat: 0.672 DockQ: 0.725 -First medium model - rank: 3 i-RMSD: 1.238 Fnat: 0.672 DockQ: 0.725 -Best model - rank: 6 i-RMSD: 1.074 Fnat: 0.707 DockQ: 0.731 +Total number of high quality models: 1 out of 83 + +First acceptable model - rank: 2 i-RMSD: 1.422 Fnat: 0.586 DockQ: 0.631 +First medium model - rank: 2 i-RMSD: 1.422 Fnat: 0.586 DockQ: 0.631 +Best model - rank: 24 i-RMSD: 0.982 Fnat: 0.759 DockQ: 0.774 ============================================== -== runs/run-ens-CDR-NMR-CSP/06_caprieval/capri_ss.tsv +== run1-ens//06_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 27 out of 83 -Total number of medium or better models: 10 out of 83 -Total number of high quality models: 5 out of 83 - -First acceptable model - rank: 1 i-RMSD: 1.492 Fnat: 0.741 DockQ: 0.697 -First medium model - rank: 1 i-RMSD: 1.492 Fnat: 0.741 DockQ: 0.697 -Best model - rank: 4 i-RMSD: 0.857 Fnat: 0.897 DockQ: 0.872 +Total number of acceptable or better models: 17 out of 83 +Total number of medium or better models: 9 out of 83 +Total number of high quality models: 4 out of 83 + +First acceptable model - rank: 1 i-RMSD: 0.836 Fnat: 0.931 DockQ: 0.878 +First medium model - rank: 1 i-RMSD: 0.836 Fnat: 0.931 DockQ: 0.878 +Best model - rank: 7 i-RMSD: 0.829 Fnat: 0.845 DockQ: 0.854 ============================================== -== runs/run-ens-CDR-NMR-CSP/08_caprieval/capri_ss.tsv +== run1-ens//08_caprieval/capri_ss.tsv ============================================== -Total number of acceptable or better models: 26 out of 83 -Total number of medium or better models: 10 out of 83 -Total number of high quality models: 3 out of 83 - -First acceptable model - rank: 1 i-RMSD: 1.504 Fnat: 0.776 DockQ: 0.708 -First medium model - rank: 1 i-RMSD: 1.504 Fnat: 0.776 DockQ: 0.708 -Best model - rank: 4 i-RMSD: 0.902 Fnat: 0.914 DockQ: 0.871 +Total number of acceptable or better models: 16 out of 83 +Total number of medium or better models: 9 out of 83 +Total number of high quality models: 3 out of 83 + +First acceptable model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +First medium model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +Best model - rank: 12 i-RMSD: 0.851 Fnat: 0.845 DockQ: 0.851 +============================================== +== run1-ens//11_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 10 out of 44 +Total number of medium or better models: 4 out of 44 +Total number of high quality models: 2 out of 44 + +First acceptable model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +First medium model - rank: 1 i-RMSD: 0.908 Fnat: 0.897 DockQ: 0.855 +Best model - rank: 2 i-RMSD: 0.879 Fnat: 0.948 DockQ: 0.881
@@ -2144,7 +2164,7 @@ Our antibody-antigen complex consists of three interfaces:
- +
![]()
@@ -2220,7 +2240,7 @@ Does any model have the NMR-identified epitope at the interface with the antibod
- +
![]()
@@ -2246,7 +2266,7 @@ alignto sele
- +
![]()
@@ -2268,7 +2288,7 @@ Try to reproduce the previous steps and examine the quality of the various gener
- +
![]()
@@ -2281,7 +2301,7 @@ Try to reproduce the previous steps and examine the quality of the various gener
- +
![]()
@@ -2349,7 +2369,7 @@ You will land on the workflow-builder page, where you can interactively build yo This page is subdivided into three areas described below.- On the left is presented the list of modules. @@ -2367,42 +2387,6 @@ Unfold a property by clicking on it, and discover the set of related parameters. Finally, once you configured your workflow, click on `submit` to launch the corresponding haddock3 run. - -+
--- Display available modules expand_more -
- -* **Topology modules** - * `topoaa`: *Generates the all-atom topologies for the CNS engine.* - -* **Sampling modules** - * `rigidbody`: *Performs rigid body energy minimization with CNS (`it0` in haddock2.x).* - * `lightdock`: *Third-party glow-worm swam optimization docking software.* - -* **Model refinement modules** - * `flexref`: *Performs semi-flexible refinement using a simulated annealing protocol through molecular dynamics simulations in torsion angle space (`it1` in haddock2.x).* - * `emref`: *Performs refinement by energy minimisation (`itw` EM only in haddock2.4).* - * `mdref`: *Performs refinement by a short molecular dynamics simulation in explicit solvent (`itw` in haddock2.X).* - -* **Scoring modules** - * `emscoring`: *Performs scoring of a complex performing a short EM (builds the topology and all missing atoms).* - * `mdscoring`: *Performs scoring of a complex performing a short MD in explicit solvent + EM (builds the topology and all missing atoms).* - -* **Analysis modules** - * `alascan`: *Performs a systematic (or user-define) alanine scanning mutagenesis of interface residues.* - * `caprieval`: *Calculates CAPRI metrics (i-RMSD, l-RMSD, Fnat, DockQ) with respect to the top scoring model or reference structure if provided.* - * `clustfcc`: *Clusters models based on the fraction of common contacts (FCC)* - * `clustrmsd`: *Clusters models based on pairwise RMSD matrix calculated with the `rmsdmatrix` module.* - * `contactmap`: *Generate contact matrices of both intra- and intermolecular contacts and a chordchart of intermolecular contacts.* - * `rmsdmatrix`: *Calculates the pairwise RMSD matrix between all the models generated in the previous step.* - * `ilrmsdmatrix`: *Calculates the pairwise interface-ligand-RMSD (il-RMSD) matrix between all the models generated in the previous step.* - * `seletop`: *Selects the top N models from the previous step.* - * `seletopclusts`: *Selects top N clusters from the previous step.* - -
-
**Note** that you can also upload a zip archive of a workflow containing a configuration file named `workflow.cfg` and all corresponding files (e.g.: pdb structures, restraints files, topological parameters, etc.). Workflow archives presented in this tutorial are available in `workflows/webapp-workflows/`. @@ -2429,8 +2413,6 @@ Create the zip archive
-
- **Note** that the archives of workflows are available in `workflows/webapp-workflows/`, and archives of pre-computed runs are stored in `runs/webapp_runs/`. @@ -2442,7 +2424,7 @@ On the right side of the table, actions can be performed. The current implementation allows to rename a run or to delete it.- To access the content of a run, click on its name to be directed to the haddock3 webapp results page. @@ -2551,5 +2533,5 @@ And check also our [education](/education){:target="_blank"} web page where you [link-freesasa]: https://freesasa.github.io "FreeSASA" [link-pdbtools]:http://www.bonvinlab.org/pdb-tools/ "PDB-Tools" [link-pymol]: https://www.pymol.org/ "PyMOL" -[nat-pro]: https://www.nature.com/nprot/journal/v5/n5/abs/nprot.2010.32.html "Nature protocol" +[nat-pro]: https://www.nature.com/articles/s41596-024-01011-0.epdf?sharing_token=UHDrW9bNh3BqijxD2u9Xd9RgN0jAjWel9jnR3ZoTv0O8Cyf_B_3QikVaNIBRHxp9xyFsQ7dSV3t-kBtpCaFZWPfnuUnAtvRG_vkef9o4oWuhrOLGbBXJVlaaA9ALOULn6NjxbiqC2VkmpD2ZR_r-o0sgRZoHVz10JqIYOeus_nM%3D "Nature protocol" [tbl-examples]: https://github.com/haddocking/haddock-tools/tree/master/haddock_tbl_validation "tbl examples" diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md-Bratislava b/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md-Bratislava similarity index 100% rename from education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md-Bratislava rename to education/HADDOCK3/HADDOCK3-antibody-antigen/index.md-Bratislava diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/mutant-ref-overlay-alascan.png b/education/HADDOCK3/HADDOCK3-antibody-antigen/mutant-ref-overlay-alascan.png new file mode 100644 index 000000000..cb24cec20 Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-antibody-antigen/mutant-ref-overlay-alascan.png differ diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/mutant-stacking.png b/education/HADDOCK3/HADDOCK3-antibody-antigen/mutant-stacking.png new file mode 100644 index 000000000..23d22b9f1 Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-antibody-antigen/mutant-stacking.png differ diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/air_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/air_clt.html new file mode 100644 index 000000000..1a0724d59 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/air_clt.html @@ -0,0 +1,23 @@ + ++
+ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/bsa_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/bsa_clt.html new file mode 100644 index 000000000..6569b303d --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/bsa_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/capri_clt.tsv b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/capri_clt.tsv new file mode 100644 index 000000000..5100ccedf --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/capri_clt.tsv @@ -0,0 +1,17 @@ +######################################## +# `caprieval` cluster-based analysis +# +# > sortby_key=score +# > sort_ascending=True +# > clt_threshold=4 +# +# NOTE: if under_eval=yes, it means that there were less models in a cluster than +# clt_threshold, thus these values were under evaluated. +# You might need to tweak the value of clt_threshold or change some parameters +# in `clustfcc` depending on your analysis. +# +######################################## +cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std ilrmsd ilrmsd_std rmsd rmsd_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank +1 3 4 - -126.634 15.010 1.044 0.180 0.888 0.058 2.381 0.403 0.830 0.045 1.764 0.382 1.160 0.159 144.623 25.775 1894.755 76.054 4.928 5.550 -490.916 78.318 -394.134 70.848 -47.841 5.927 1 +2 2 4 - -98.425 2.624 14.572 0.524 0.095 0.009 23.293 0.233 0.074 0.002 22.593 0.371 14.300 0.194 159.227 8.415 1781.358 114.002 2.706 2.898 -340.312 32.395 -230.077 26.771 -48.992 5.015 2 +3 1 4 - -91.137 1.918 10.249 0.530 0.056 0.007 19.692 0.505 0.078 0.005 18.190 0.649 10.554 0.495 173.598 42.201 1441.505 77.296 4.873 4.329 -389.212 18.467 -251.141 40.747 -35.527 5.170 3 diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/capri_ss.tsv b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/capri_ss.tsv new file mode 100644 index 000000000..eaa1ba5fb --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/capri_ss.tsv @@ -0,0 +1,13 @@ +model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq rmsd cluster_id cluster_ranking model-cluster_ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg sym total vdw vean xpcs +../../09_seletopclusts/cluster_1_model_1.pdb - 1 -140.319 0.908 0.897 2.205 1.451 0.855 1.016 3 1 1 133.760 0.000 0.000 2010.880 0.000 0.000 0.000 7.010 0.000 -605.174 0.000 0.000 0.000 0.000 -511.084 -39.671 0.000 0.000 +../../09_seletopclusts/cluster_1_model_2.pdb - 2 -137.507 0.879 0.948 1.951 1.354 0.881 0.989 3 1 2 189.059 0.000 0.000 1913.390 0.000 0.000 0.000 3.243 0.000 -521.143 0.000 0.000 0.000 0.000 -387.512 -55.428 0.000 0.000 +../../09_seletopclusts/cluster_1_model_3.pdb - 3 -126.481 1.052 0.914 3.038 1.958 0.824 1.293 3 1 3 127.044 0.000 0.000 1816.780 0.000 0.000 0.000 -2.884 0.000 -426.677 0.000 0.000 0.000 0.000 -350.599 -50.966 0.000 0.000 +../../09_seletopclusts/cluster_1_model_4.pdb - 4 -102.227 1.334 0.793 2.331 2.292 0.760 1.341 3 1 4 128.628 0.000 0.000 1837.970 0.000 0.000 0.000 12.344 0.000 -410.669 0.000 0.000 0.000 0.000 -327.341 -45.299 0.000 0.000 +../../09_seletopclusts/cluster_2_model_1.pdb - 5 -102.077 14.789 0.103 23.359 22.787 0.077 14.405 2 2 1 163.844 0.000 0.000 1888.310 0.000 0.000 0.000 2.575 0.000 -348.025 0.000 0.000 0.000 0.000 -235.613 -51.431 0.000 0.000 +../../09_seletopclusts/cluster_2_model_2.pdb - 6 -99.007 13.669 0.086 23.035 21.986 0.073 13.965 2 2 2 145.719 0.000 0.000 1592.390 0.000 0.000 0.000 -1.450 0.000 -346.018 0.000 0.000 0.000 0.000 -243.225 -42.926 0.000 0.000 +../../09_seletopclusts/cluster_2_model_3.pdb - 7 -97.874 14.908 0.103 23.642 22.973 0.076 14.413 2 2 3 168.143 0.000 0.000 1795.210 0.000 0.000 0.000 6.735 0.000 -378.411 0.000 0.000 0.000 0.000 -256.009 -45.741 0.000 0.000 +../../09_seletopclusts/cluster_2_model_4.pdb - 8 -94.743 14.921 0.086 23.138 22.625 0.072 14.418 2 2 4 159.203 0.000 0.000 1849.520 0.000 0.000 0.000 2.964 0.000 -288.793 0.000 0.000 0.000 0.000 -185.459 -55.869 0.000 0.000 +../../09_seletopclusts/cluster_3_model_1.pdb - 9 -93.165 10.537 0.052 19.771 18.571 0.076 10.816 1 3 1 179.052 0.000 0.000 1570.970 0.000 0.000 0.000 10.600 0.000 -410.008 0.000 0.000 0.000 0.000 -270.624 -39.669 0.000 0.000 +../../09_seletopclusts/cluster_3_model_2.pdb - 10 -91.828 10.161 0.052 19.964 18.039 0.076 10.559 1 3 2 230.397 0.000 0.000 1406.960 0.000 0.000 0.000 -0.695 0.000 -375.030 0.000 0.000 0.000 0.000 -183.799 -39.166 0.000 0.000 +../../09_seletopclusts/cluster_3_model_3.pdb - 11 -91.568 10.860 0.052 20.179 18.939 0.074 11.083 1 3 3 111.407 0.000 0.000 1420.870 0.000 0.000 0.000 7.154 0.000 -367.082 0.000 0.000 0.000 0.000 -292.122 -36.446 0.000 0.000 +../../09_seletopclusts/cluster_3_model_4.pdb - 12 -87.985 9.438 0.069 18.853 17.211 0.088 9.759 1 3 4 173.535 0.000 0.000 1367.220 0.000 0.000 0.000 2.434 0.000 -404.727 0.000 0.000 0.000 0.000 -258.020 -26.828 0.000 0.000 diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/desolv_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/desolv_clt.html new file mode 100644 index 000000000..425f11968 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/desolv_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_air.html new file mode 100644 index 000000000..ba9486c77 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_air.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_clt.html new file mode 100644 index 000000000..d2a3f5ccf --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_desolv.html new file mode 100644 index 000000000..38139646e --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_desolv.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_elec.html new file mode 100644 index 000000000..12eeafa80 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_elec.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_score.html new file mode 100644 index 000000000..d2af3a785 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_score.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_vdw.html new file mode 100644 index 000000000..e5f878d97 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/dockq_vdw.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/elec_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/elec_clt.html new file mode 100644 index 000000000..cad52f2d2 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/elec_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_air.html new file mode 100644 index 000000000..3ac0436b4 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_air.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_clt.html new file mode 100644 index 000000000..90b81ef85 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_desolv.html new file mode 100644 index 000000000..388bb9645 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_desolv.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_elec.html new file mode 100644 index 000000000..b4c5bdbbb --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_elec.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_score.html new file mode 100644 index 000000000..f01319981 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_score.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_vdw.html new file mode 100644 index 000000000..89c8231fd --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/fnat_vdw.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_air.html new file mode 100644 index 000000000..9053ad7d1 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_air.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_clt.html new file mode 100644 index 000000000..71130cda3 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_desolv.html new file mode 100644 index 000000000..9ab8ae7d3 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_desolv.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_elec.html new file mode 100644 index 000000000..aed7ef7fb --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_elec.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_score.html new file mode 100644 index 000000000..dfdf41bd2 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_score.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_vdw.html new file mode 100644 index 000000000..ee904bfd6 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/ilrmsd_vdw.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_air.html new file mode 100644 index 000000000..cd6bc22c5 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_air.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_clt.html new file mode 100644 index 000000000..4b64aff61 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_desolv.html new file mode 100644 index 000000000..bf4b142b5 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_desolv.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_elec.html new file mode 100644 index 000000000..01af78d98 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_elec.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_score.html new file mode 100644 index 000000000..4cc1e4003 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_score.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_vdw.html new file mode 100644 index 000000000..a1125b3bf --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/irmsd_vdw.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_air.html new file mode 100644 index 000000000..b8820b37c --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_air.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_clt.html new file mode 100644 index 000000000..0978211e1 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_desolv.html new file mode 100644 index 000000000..2f0fdb1a8 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_desolv.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_elec.html new file mode 100644 index 000000000..391437441 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_elec.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_score.html new file mode 100644 index 000000000..fa00c6c76 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_score.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_vdw.html new file mode 100644 index 000000000..0d51599f7 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/lrmsd_vdw.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/report.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/report.html new file mode 100644 index 000000000..6e0e9081f --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/report.html @@ -0,0 +1,94 @@ +++ + +Analysis report of step 10_caprieval Analysis report of step 10_caprieval
+ + + +
++ + ++++ + +
++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/air_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/air_clt.html deleted file mode 100644 index 01febf803..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/air_clt.html +++ /dev/null @@ -1,2 +0,0 @@ -++ + +-\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/desolv_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/desolv_clt.html deleted file mode 100644 index 37b533c75..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/desolv_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_air.html deleted file mode 100644 index c81d7140f..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_air.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_desolv.html deleted file mode 100644 index 69c866038..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_desolv.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_elec.html deleted file mode 100644 index 3a197f125..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_elec.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_score.html deleted file mode 100644 index 0b00ef699..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_score.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_vdw.html deleted file mode 100644 index abbab02ed..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/dockq_vdw.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/elec_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/elec_clt.html deleted file mode 100644 index 61de154c1..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/elec_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/irmsd_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/irmsd_score.html deleted file mode 100644 index a995b9aa3..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/irmsd_score.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/score_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/score_clt.html deleted file mode 100644 index 9a97e7c1d..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/score_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/vdw_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/vdw_clt.html deleted file mode 100644 index 3cf8d3002..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario1-surface/vdw_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/air_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/air_clt.html deleted file mode 100644 index c4b752f87..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/air_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/desolv_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/desolv_clt.html deleted file mode 100644 index ff9656e0d..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/desolv_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_air.html deleted file mode 100644 index 82b8b8377..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_air.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_desolv.html deleted file mode 100644 index 49d5f940f..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_desolv.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_elec.html deleted file mode 100644 index 572140869..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_elec.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_score.html deleted file mode 100644 index 47ac5e5b2..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_score.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_vdw.html deleted file mode 100644 index d06e3c5e3..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/dockq_vdw.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/elec_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/elec_clt.html deleted file mode 100644 index d668df86c..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/elec_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/irmsd_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/irmsd_score.html deleted file mode 100644 index 46745a01f..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/irmsd_score.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/score_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/score_clt.html deleted file mode 100644 index 45ebc06c3..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/score_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/vdw_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/vdw_clt.html deleted file mode 100644 index a680d8e07..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2a-NMR-epitope-pass/vdw_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/air_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/air_clt.html deleted file mode 100644 index e928477fa..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/air_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/desolv_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/desolv_clt.html deleted file mode 100644 index 963fd3a19..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/desolv_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_air.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_air.html deleted file mode 100644 index 5a3b57083..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_air.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_desolv.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_desolv.html deleted file mode 100644 index 0962df9c7..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_desolv.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_elec.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_elec.html deleted file mode 100644 index 91eb48bac..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_elec.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_score.html deleted file mode 100644 index d13698ea8..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_score.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_vdw.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_vdw.html deleted file mode 100644 index 60746e7ed..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/dockq_vdw.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/elec_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/elec_clt.html deleted file mode 100644 index 304bbb388..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/elec_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/irmsd_score.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/irmsd_score.html deleted file mode 100644 index 6c0360530..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/irmsd_score.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/score_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/score_clt.html deleted file mode 100644 index 8d8a3c990..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/score_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/vdw_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/vdw_clt.html deleted file mode 100644 index 56bc68f77..000000000 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/scenario2b-NMR-epitope-act/vdw_clt.html +++ /dev/null @@ -1,2 +0,0 @@ --\ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/score_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/score_clt.html new file mode 100644 index 000000000..e26ab133a --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/score_clt.html @@ -0,0 +1,23 @@ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/summary.tgz b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/summary.tgz new file mode 100644 index 000000000..d7ddd14e9 Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/summary.tgz differ diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/vdw_clt.html b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/vdw_clt.html new file mode 100644 index 000000000..834fc9caf --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/plots/vdw_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/results-best-model.png b/education/HADDOCK3/HADDOCK3-antibody-antigen/results-best-model.png index 74d10f63d..0f09b99a8 100644 Binary files a/education/HADDOCK3/HADDOCK3-antibody-antigen/results-best-model.png and b/education/HADDOCK3/HADDOCK3-antibody-antigen/results-best-model.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-OR1-interface.png b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-OR1-interface.png new file mode 100644 index 000000000..f801f526e Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-OR1-interface.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-OR1.png b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-OR1.png new file mode 100644 index 000000000..2937d6a9d Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-OR1.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-interface.png b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-interface.png new file mode 100644 index 000000000..6cf708bae Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/CRO-interface.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/alig_ref_cl1mod1.png b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/alig_ref_cl1mod1.png new file mode 100644 index 000000000..4de2b6132 Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/alig_ref_cl1mod1.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/alig_ref_cl1mod4.png b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/alig_ref_cl1mod4.png new file mode 100644 index 000000000..350dd1d80 Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/alig_ref_cl1mod4.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/index.md b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/index.md new file mode 100644 index 000000000..5084b5a18 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/index.md @@ -0,0 +1,622 @@ +--- +layout: page +title: "Basic protein-DNA docking using local version of Haddock3" +excerpt: "A basic tutorial on protein-DNA docking in Haddock3." +tags: [HADDOCK, Haddock3, PyMOL, Protein-DNA, symmetry, 3-body docking] +image: + feature: pages/banner_education-thin.jpg +--- +This tutorial consists of the following sections: + +* table of contents +{:toc} + +++ + +
+ +## Introduction + +This tutorial demonstrates how to setup a Haddock3 workflow dedicated to predict the 3D structure of protein-DNA (double-stranded DNA) complexes using pre-defined restraints, derived from the literature, and symmetry restraints. Here, we introduce the basic concepts of HADDOCK, suitable for tackling various typical protein-DNA docking challenges: +* basic preparation of the input PDB files; +* creation of the suitable Haddock3 workflow; +* basic analysis of the docking results. + +Please note that we do not cover the processing of literature data into docking restraints; for more information, please refer to the [advanced tutorial](https://www.bonvinlab.org/education/HADDOCK24/HADDOCK24-protein-DNA-advanced/). + +Computation within this tutorial should take 1.5 hours on 8 CPUs. The tutorial data, as well as precomputed results available [here](https://surfdrive.surf.nl/files/index.php/s/NzibuNryl3RgVPn). + +### Tutorial test case + +In this tutorial, we will work with the phage 434 Cro/OR1 complex (PDB: [3CRO](https://www.rcsb.org/structure/3CRO)), formed by bacteriophage 434 Cro repressor proteins and the OR1 operator. + +Cro is part of the bacteriophage 434 genetic switch, playing a key role in controlling the switch between the lysogenic and lytic cycles of the bacteriophage. It is a *repressor* protein that works in opposition to the phage's repressor cI protein to control the genetic switch. Both repressors compete to gain control over an operator region containing three operators that determine the state of the lytic/lysogenic genetic switch. If Cro prevails, the late genes of the phage will be expressed, resulting in lysis. Conversely, if the cI repressor prevails, the transcription of Cro genes is blocked, and cI repressor synthesis is maintained, resulting in a state of lysogeny. + +### Solved structure of the Cro-OR1 complex + +The structure of the phage 434 Cro/OR1 complex was solved by X-RAY crystallography at 2.5Å. +We will use this experimentally solved structure (PDBID: 3CRO) as a reference within the tutorial. +Cro is a symmetrical dimer, each subunit contains a helix-turn-helix (HTH), with helices α2 and α3 being separated by a short turn. +This is a DNA binding motif that is known to bind major grooves. +Helix α3 is the recognition helix that fits into the major groove of the operator DNA and is oriented with its axes parallel to the major groove. +The side chains of each helix are thus positioned to interact with the edges of base pairs on the floor of the groove. Non-specific interactions also help to anchor Cro to the DNA. +These include H-bonds between main chain NH groups and phosphate oxygens of the DNA in the region of the operator. +Cro distorts the normal B-form DNA conformation: the OR1 DNA is bent (curved) by Cro, and the middle region of the operator is overwound, as reflected in the reduced distance between phosphate backbones in the minor groove. + ++ + +Throughout the tutorial, coloured text will be used to refer to questions, instructions, PyMOL and terminal prompts: + +This is a question prompt: try to answer it! +This is an instruction prompt: follow it! +This is a PyMOL prompt: write this in the PyMOL command line prompt! +This is a Linux prompt: insert the commands in the terminal! + +It is always possible that you have questions or run into problems for which you cannot find the answer. Here are some additional links that can help you to find the answers and solutions: + +* Haddock3 Documentation: [https://www.bonvinlab.org/haddock3/](https://www.bonvinlab.org/haddock3/) +* Bioexcel User Forum: [https://ask.bioexcel.eu/c/haddock/6](https://ask.bioexcel.eu/c/haddock/6) +* Haddock3 Github (issues & discussions): [https://github.com/haddocking/haddock3/](https://github.com/haddocking/haddock3/) +* HADDOCK Help Center: [https://wenmr.science.uu.nl/haddock2.4/help](https://wenmr.science.uu.nl/haddock2.4/help) + ++
+ +## Software and data setup + +For a complete setup of the local version of Haddock3, refer to the [online documentation](https://www.bonvinlab.org/haddock3/). +Please, familiarise yourself with the sections ['**A brief introduction to HADDOCK3**'](https://www.bonvinlab.org/haddock3/intro.html) and ['**Installation**'](https://www.bonvinlab.org/haddock3/INSTALL.html). + +In this tutorial we will use the PyMOL molecular visualisation system. If not already installed, download and install PyMOL from [here](https://pymol.org/). You can use your favourite visualisation software instead, but be aware that instructions in this tutorial are provided only for PyMOL. + +Please, download and decompress the tutorial data archive. Move the archive to your working directory of choice and extract it. You can download and unzip this archive directly from the Linux command line: + + +wget https://surfdrive.surf.nl/files/index.php/s/NzibuNryl3RgVPn/download -O haddock3-protein-dna-basic.zip
+unzip haddock3-protein-dna-basic.zip + + +Decompressing the file will create the `haddock3-protein-dna-basic` directory with the following subdirectories and items: +* `protein-dna-basic.cfg`, a Haddock3 configuration file; +* `pdbs`: + * `1ZUG_ensemble.pdb`, an NMR ensemble (10 structures) of the 343 Cro repressor structures; + * `1ZUG_dimer1.pdb`, a single structure coming from the NMR ensemble (`1ZUG_ensemble.pdb`) with the terminal disordered residues removed; + * `1ZUG_dimer2.pdb`, a single structure coming from the NMR ensemble (`1ZUG_ensemble.pdb`) with the terminal disordered residues removed. Differ from `1ZUG_dimer1.pdb` only by chain ID; + * `OR1_unbound.pdb`, a structure of the OR1 operator in B-DNA conformation; + * `3CRO_complex.pdb`, an X-RAY structure of the sought-for complex, to be used to evaluate the docking poses. +* `restraints`: + * `ambig_prot_dna.tbl`, a file containing the ambiguous restraints for this docking scenario. +* `saved-run`, a folder with Haddock3 output, produced by the `protein-dna-basic.cfg` workflow. We will navigate relevant parts of this folder throughout the tutorial. + +
+ +## Understanding the Ambiguous Interaction Restraints + +The Ambiguous Interaction Restraints (AIRs) are crucial to successful docking, as they guide the modelling process by biasing partners' conformations and co-orientation towards a certain, hopefully correct conformation of the complex. +AIRs consist of the residues located in the interface of a complex. + +### Visualisation of the interface + +Let's visualise residues in the interface of the 3CRO using PyMOL. +Open `pdbs/3CRO_complex.pdb` in PyMOL and type: + +color lightorange, all + + +select interface, (chain A and resi 28+29+30+31+32+34+35, chain C and resi 28+29+30+31+32+34+35, chain B and resi 4+5+6+7+13+14+15+16+17+18+22+23+24+25+31+32+33+34+35+36) + + +color red, interface + + ++++ Now the residues of the interface are displayed in red.expand_more +
++ ++
+
+Let's highlight the same residues on the unbound NRM structure of the protein. +Open PyMOL and type: + +fetch 1ZUG + + +color lightorange, all + + +select region, (resi 28+29+30+31+32+34+35) + + +color red, region + + +set all_states, on + + + +How much does the conformation of the interacting region change in the provided ensemble? Is this the most flexible region of the protein? + ++++ Answer expand_more +
++ The conformation of the interacting region does not change in the ensemble, this region is not the most flexible. The most flexible region is the tail of the protein, and it appears to not interact with the DNA. +
+
+Let's switch to the surface representation of a single model: + +set all_states, off + + +show surface + + ++++ Surface representation of a single model with interface residues highlighted in red.expand_more +
++ ++
+
+ +Can you tell why the residue 33 is excluded from the interface amino acids? + ++++ Answer expand_more +
++ The residue 33 is not solvent accessible, thus it does not belong to the interface. You can visualise it by colouring residue 33 in blue, and switching between to the cartoon and surface representations (sele resi 33; color blue, sele; show cartoon; show surface) +
+
+ +Repeat the same analysis with the DNA molecule OR1_unbound.pdb using the residues (13+14+15+16+17+18+22+23+24+25) or (4+5+6+7+31+32+33+34+35+36) as an interface. + + +_**Note**_ that in the real docking case the bound structure of a complex is unavailable. In this case, the interface residues could be located using experimental data, knowledge from the literature, using computational predictions, etc. + +### What’s inside of the restraints file? + +The ambiguous interaction restraints are defined in the `ambig_prot_dna.tbl` file. +This file was created using both experimental knowledge and information from literature. +A detailed explanation of how to generate these restraints can be found in the advanced version of the tutorial, accessible [here](https://www.bonvinlab.org/education/HADDOCK24/HADDOCK24-protein-DNA-advanced/#available-data). + +Let’s have a look at it's first lines: +```bash +assign ( resid 35 and segid C) +( + ( resid 32 and segid B and (name H3 or name O4 or name C4 or name C5 or name C6 or name C7)) + or + ( resid 33 and segid B and (name H3 or name O4 or name C4 or name C5 or name C6 or name C7)) +) 2.000 2.000 0.000 +``` +The first line means that the residue 35 from the chain C (protein) should interact either with the residue 32, or with the residue 33 of the chain B (DNA). Additionally, the substring `(name H3 or name O4 or name C4 or name C5 or name C6 or name C7)` precise the atoms with which the interaction should occur. + +To simplify, if at least one pair or residues (residue 35 from chain C; residue 32 from chain B) or (residue 35 from chain C; residue 33 from chain B) are located in the vicinity from one another - then this particular restraint is satisfied. + + +Check out the list of atoms defined in 'ambig_prot_DNA.tbl'. Which part of the DNA is targeted by defining a given set of atoms? What is the effect of such restraints onto the docking process? + + +You may notice that not all residues of the protein’s interface are used for the AIRs. This is done to save computational time. + +HADDOCK is not limited to ambiguous restraints, other types, like unambiguous and symmetry restraints can play an important role as well. As mentioned before, Cro is known to function as a symmetrical dimer. This means we should **enforce a pairwise symmetry (C2)** between the two protein monomers. This part will be explained in the [Haddock3 workflow](#haddock3-workflow) section of the tutorial. + +
+ +## Preparation of PDB files for the docking (optional) + +Haddock3 requires an input structure for each docking partner. The quality of these input structures are highly influential to the quality of the docking models. Conformational deficiencies such as sterical clashes, chain breaks and missing atoms may cause problems during the docking, so it is important to verify each input file. +Another important factor is the difference between unbound and bound conformations. The more different these conformations are, the more difficult it is to generate correct docking models. + +In this section we will go over the preparation of the protein structures. The preparation of the DNA structure is out of the scope of this tutorial, but is detailed in the [advanced tutorial](https://www.bonvinlab.org/education/HADDOCK24/HADDOCK24-protein-DNA-advanced/#preparing-pdb-coordinate-files-for-the-or1-operator). + +Ready-to-dock structures are available in `pdbs` directory, namely `1ZUG_dimer1.pdb`, `1ZUG_dimer2.pdb` and `OR1_unbound.pdb`. + +### Protein structures + +An unbound structure of the protein is available on [PDB](https://www.rcsb.org/structure/1ZUG). We already examined this structure using PyMOL. +Our observation revealed that this protein has a disordered tail, which does not interact with the DNA. +Since the core conformation remains unchanged, we can simply take the first conformation from the ensemble, remove the disordered tail from it and use it as an input structure for the docking. + +This can be done using `pdb-tools`, a collection of simple scripts handy to manipulate pdb files. `pdb-tools` is installed automatically with Haddock3. Alternatively, it is also available as a [web-server](https://wenmr.science.uu.nl/pdbtools/). + +To obtain a single trimmed structure, we will make use of the command-line version of `pdb-tools`. +Please, _**remember to activate a virtual environment for Haddock3**_ before using `pdb-tools`. +The following command will override the existing file with the name `1ZUG_dimer1.pdb` - consider executing it outside of `pdbs/`: + +pdb_fetch 1ZUG | pdb_selmodel -1 | pdb_delhetatm | pdb_selres -1:66 | pdb_tidy -strict > 1ZUG_dimer1.pdb + + +This sequence of commands: 1/ Downloads given structure in PDB format from the RCSB website; 2/ Extracts the first model from the file; 3/ Removes all HETATM records in the PDB file; 4/ Selects residues by their index (in a range); 5/ Adds TER statement at the end of the chain. + +The complex of interest contains 2 copies of the protein. As each molecule given to HADDOCK in a docking scenario must have a **unique chain ID and segment ID**, we have to change the chain ID from A to C and save this as a new structure. +This can be achieved using another command from `pdb_tools`, namely, `pdb_rplchain`, which stands for "replace chain": + +pdb_rplchain -A:C 1ZUG_dimer1.pdb > 1ZUG_dimer2.pdb + + +_**Note**_ that it is possible to perform the docking with an ensemble of trimmed conformations. Such ensemble can be obtained using the following command: `pdb_fetch 1ZUG | pdb_delhetatm | pdb_selres -1:66 | pdb_tidy -strict > 1zug_ens.pdb`. Here the command `pdb_selmodel -1` was removed and therefore all the available models in the ensemble will be processed. + +
+ +## Docking with Haddock3 + +In this section, we will discuss the specificities related to protein-DNA docking in the frame of Haddock3. We will then create an appropriate Haddock3 workflow and, finally, perform an analysis of the docking results and evaluate their quality with respect to the experimentally solved structure as a reference. + +### Specifics of the protein-DNA docking + +Docking a double-stranded DNA requires adjusting several default parameters to better mimic the conditions under which DNA interactions occur. The following parameters should be modified: +* Add an automatic restraint to maintain the input conformation of the DNA during refinement: `dnarest_on = true`; +* Perform explicit solvent molecular dynamics refinement instead of energy minimization refinement by using the `[mdref]` module instead of `[emref]`; +* Set the dielectric constant to 78 for both sampling and flexible refinement (but not for MD refinement): `epsilon = 78`; +* Fix the relative dielectric constant in the Coulomb potential (rather than using a distance-dependent mode) for both sampling and flexible refinement: `dielec = cdie`; +* Set the weight of the desolvation energy term to 0: `w_desolv = 0`; +* Lower the scaling factor for flexible refinement to 4 (from 8) to allow less movement during the refinement: `tadfactor = 4`; +* Lower the initial temperature for the final round of flexible refinement to 300 (from 1000) to allow less movement during the final refinement stage: `temp_cool3_init = 300`. + +_**Note**_ that Haddock3 distinguishes DNA nucleotides from RNA nucleotides based on the residue naming in the PDB file. DNA nucleotides are named with two letters starting with 'D' (e.g., 'DA' for deoxyadenosine in DNA), while RNA nucleotides use single-letter names (e.g., 'A' for adenosine in RNA). + +### Haddock3 workflow + +Now that we have all the necessary files ready for docking, along with several insights into the specifics of protein-DNA docking, it’s time to create the docking workflow. In this scenario, we will adhere to the following straightforward workflow: rigid-body docking, semi-flexible refinement in torsional angle space, molecular dynamics (MD) refinement in explicit solvent, and a final RMSD clustering step. + +Our workflow consists of the following modules: +* **topoaa**: _Generates the topologies for the CNS engine and builds missing atoms;_ +* **rigidbody**: _Performs sampling by rigid-body energy minimization (equivalent to `it0` in Haddock2.X);_ +* **caprieval**: _Calculates CAPRI metrics (i-RMSD, l-RMSD, Fnat, DockQ) with respect to the best-scored model or a provided reference structure;_ +* **seletop**: _Selects X best-scored models from the previous module;_ +* **flexref**: _Performs semi-flexible refinement of the interface (equivalent to `it1` in Haddock2.X);_ +* **caprieval** +* **mdref**: _Performs final refinement via explicit solvent MD (equivalent to itw in Haddock2.X);_ +* **caprieval** +* **rmsdmatrix**: _Calculates of the root mean squared deviation (RMSD) matrix between all models from the previous module;_ +* **clustrmsd**: _Takes the RMSD matrix calculated in the `[rmsdmatrix]` module and performs a hierarchical clustering procedure on it;_ +* **seletopclusts**: _Selects X best-scored models of Y clusters._ +* **caprieval**: _Final assessment of the docking results._ + +As mentioned before, we should enforce C2 symmetry between the proteins throughout the entire docking process. This can be easily achieved by adding the following parameters to the `[rigidbody]` , `[flexref]` , and `[mdref]` modules: + +```toml +# Turn on symmetry restraints +sym_on = true +# Define first symmetry partner +c2sym_seg1_1 = 'A' +# Define second symmetry partner +c2sym_seg2_1 = 'C' +# Specify the range of residues that should be taken from the first partner +c2sym_sta1_1 = 4 +c2sym_end1_1 = 64 +# Specify the range of residues that should be taken from the second partner +c2sym_sta2_1 = 4 +c2sym_end2_1 = 64 +``` + +**Note** that in this definition we omitted the first 3 residues of each protein. + +Take a look at the TOML configuration file `protein-dna-basic.cfg`. + + Take your time to read the comments and relate parameters of this file to the information given above: + +{% highlight toml %} +# ==================================================================== +# Protein-DNA basic docking example with: +# 1. Pre-generated ambiguous restraints between protein dimer and DNA partners +# 2. Pairwise (C2) symmetry between the two protein monomers +# 3. Specific to double-stranded DNA-protein docking parameters +# ==================================================================== + +# directory in which the docking will be performed +run_dir = "run" + +# compute mode +mode = "local" +ncores = 8 + +# input PDBs of the docking partners +molecules = [ + "pdbs/1ZUG_dimer1.pdb", + "pdbs/OR1_unbound.pdb", + "pdbs/1ZUG_dimer2.pdb" + ] + +# compress all generated models +clean = true + +# ==================================================================== +# Workflow is defined as a pipeline of modules with specified parameters per module +# ==================================================================== + +[topoaa] + +[rigidbody] +# allow up to 5% of the models to fail without interrupting the run +tolerance = 5 +# create 1000 modles (default value) +sampling = 1000 +# Cro to OR1 ambiguous restraints +ambig_fname = "restraints/ambig_prot_DNA.tbl" +# C2 symmetry +sym_on = true +c2sym_seg1_1 = 'A' +c2sym_seg2_1 = 'C' +c2sym_sta1_1 = 4 +c2sym_sta2_1 = 4 +c2sym_end1_1 = 64 +c2sym_end2_1 = 64 +# constant for the electrostatic energy term +epsilon = 78 +# fix constant in Coulomb potential +dielec = 'cdie' +# weight of the desolvation energy term +w_desolv = 0 + +[caprieval] +reference_fname = "pdbs/3CRO_complex.pdb" + +[seletop] +# select top 200 models (default value) based on HADDOCK score +select = 200 + +[flexref] +tolerance = 5 +# to maintain conformation of the DNA with automatic restraints +dnarest_on = true +# Cro to OR1 ambiguous restraints +ambig_fname = "restraints/ambig_prot_DNA.tbl" +# C2 symmetry +sym_on = true +c2sym_seg1_1 = 'A' +c2sym_seg2_1 = 'C' +c2sym_sta1_1 = 4 +c2sym_sta2_1 = 4 +c2sym_end1_1 = 64 +c2sym_end2_1 = 64 +# constant for the electrostatic energy term +epsilon = 78 +# fix constant in Coulomb potential +dielec = 'cdie' +# weight of the desolvation energy term +w_desolv = 0 +# allow less movement during the refinement +tadfactor = 4 +# reduce the initial temperature for the final round of flexible refinement to 300 (from 1000 with default parameters) +temp_cool3_init = 300 + +[caprieval] +reference_fname = "pdbs/3CRO_complex.pdb" + +[mdref] +tolerance = 5 +# to maintain conformation of the DNA with automatic restraints +dnarest_on = true +# Cro to OR1 ambiguous restraints +ambig_fname = "restraints/ambig_prot_DNA.tbl" +# C2 symmetry +sym_on = true +c2sym_seg1_1 = 'A' +c2sym_seg2_1 = 'C' +c2sym_sta1_1 = 4 +c2sym_sta2_1 = 4 +c2sym_end1_1 = 64 +c2sym_end2_1 = 64 +# constant for the electrostatic energy term (default value) +epsilon = 1 +w_desolv = 0 +# reduce the number of MD steps +watersteps = 750 + +[caprieval] +reference_fname = "pdbs/3CRO_complex.pdb" + +[rmsdmatrix] +# by default, all residues of each docking partner are used to calculate the RMSD matrix + +[clustrmsd] +# generate an interactive plot of the clustering results +plot_matrix = true +# reduce the clustering cutoff distance from 7.5 +clust_cutoff = 5.5 + +[seletopclusts] + +[caprieval] +reference_fname = "pdbs/3CRO_complex.pdb" + +# ==================================================================== + +{% endhighlight %} + +_**Note**_ that in this example we use relative paths to define input files and output folder. However it is preferable to use the full paths instead. + +This workflow begins by creating topologies for the docking partners (`[topoaa]`). Rigid body sampling (`[rigidbody]`) is performed with ambiguous and symmetry restraints, generating 1000 models, from which the top 200 are selected (`[seletop]`). These models then undergo flexible refinement (`[flexref]`) followed by MD refinement in explicit solvent (`[mdref]`), still maintaining the same ambiguous and symmetry restraints. Finally, the RMSD matrix for docking models is computed (`[rmsdmatrix]`), followed by its clusterisation (`[clustrmsd]`). The RMSD values are calculated using the backbone atoms of all residues of each docking partner. The top 10 models from each cluster are selected (`[seletopclusts]`). The `caprieval` module is added after each step using the crystal structure as a reference to simplify the analysis and track the rank of models throughout the docking process. + +### Running Haddock3 locally + +In the first section of the configuration file you can see the definition of the global parameters: + +{% highlight toml %} +# compute mode +mode = "local" +ncores = 8 +{% endhighlight %} + +The parameter `mode` defines how this workflow will be executed. In this case, it will run locally, on your machine, using up to 8 CPUs. Feel free to change this value, if more cores are available. You can find out about other modes [here](https://www.bonvinlab.org/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/#local-execution). + +To start the docking you need to **activate your haddock3 environment**, then navigate to the folder `haddock3-protein-dna-basic` with the configuration file `protein-dna-basic.cfg`, and type one of the following: + +haddock3 protein-dna-basic.cfg > log-protein-dna-basic.out & + +or + +haddock3 protein-dna-basic.cfg + + +The first version of this command will run the docking in the background and will save output in the file `log-protein-dna-basic.out`, i.e. you will be able to close the terminal window without terminating the docking. The second version will run directly in the terminal window and will print output on the screen, so you will have to leave the terminal window open while Haddock3 is running. + +This workflow took 1.5 hours to complete using 8 CPUs (on an Apple M3 laptop). You can run it and wait for the results, or you can access pre-computed results of this protocol immediately by navigating to the `precomputed` directory. + +
+ +## Analysis of the docking results + +Let's get acquainted with the contents of the resulting directory - take a look inside. +You will find that the various steps of our workflow (modules) are numbered sequentially, starting at 0: + +{% highlight shell %} +> ls precomputed/ + 00_topoaa/ + 01_rigidbody/ + 02_caprieval/ + 03_seletop/ + 04_flexref/ + 05_caprieval/ + 06_mdref/ + 07_caprieval/ + 08_rmsdmatrix/ + 09_clustrmsd/ + 10_seletopclusts/ + 11_caprieval/ + analysis/ + data/ + log + traceback/ +{% endhighlight %} + +Additionally, you will find the following directories and files: +* `log` file: This file allows you to verify the execution of each module. More importantly, if the docking run fails, you can identify the cause by carefully reading the error messages. Also, at the very bottom of the file, you can see the total execution time of the docking run; +* `analysis` directory: Contains information relevant to the results of each caprieval step, including file `report.html` with thee table containing limited number of the top-ranked models/clusters and various plots to visualise statistics of the results; +* `data` directory: Stores input PDB files (not the actual docking models) and restraints files used in each module; +* `traceback` directory: Contains the `traceback.tsv` file, which tracks the name and rank of each docking model throughout the entire workflow. Handy, if you want to see how the model's rank evolved step-by-step. + +Each sampling, refinement, or selection module's directory contains compressed PDB files of the docking models. For example, `10_seletopclusts` contains 10 top-ranked docking models from each of the 4 clusters. Clusters are numbered based on the average rank of the models within them, so cluster_1 contains the top-ranked models of the entire run. Information about the origin of each model can be found in `10_seletopclusts/seletopclusts.txt`, as well as in `traceback/traceback.tsv`. + +One of the ways to analyse the docking results is to examine file(s) `analysis/XX_caprieval_analysis/report.html`. Depending on the positionning of the given caprieval module, `report.html` will display information about the models at the different stages of the workflow. For example, `11_caprieval_analysis/report.html` contains statictis related of the clusters, while `02_caprieval_analysis/report.html` contains statictis related to the models generated during the rigid body sampling stage. +Caprieval, executed after clustering - in this case `11_caprieval` - enables a comprehensive evaluation of the docking results from a broad perspective. + + Open 'analysis/11_caprieval_analysis/report.html' in a web browser by typing "open report.html" in the command line. Once the report is displayed, locate the table with cluster statistics at the top of the page. You can sort the columns in ascending or descending order by clicking the arrow icon (⇆) on the right side of each column header. + + +Here is a screenshot of the top-left corner of the table in `11_caprieval_analysis/report.html`: + ++ + +You can also access this file [here](/education/HADDOCK3/HADDOCK3-protein-DNA-basic/report.html). + + + +Look at the "HADDOCK score" row of the first 3 clusters: Are they significantly different if you consider the average scores, standard deviations and cluster size? + ++
+++ Answer expand_more +
++ According to the two-sample t-test, there is a significant difference between all 3 clusters. +
+
+In this docking case, we had access to the experimentally solved structure of the complex, which we provided to all `[caprieval]` modules using the `reference_fname` parameter. As a result, the interface RMSD (i-RMSD), ligand RMSD (l-RMSD), Fraction of Common Contacts (FCC), and DockQ statistics reflect the quality of the docked model with respect to the reference structure. +Remember that high DockQ and FCC values, along with low RMSD values, indicate better model quality. + + +Look at the DockQ of the clusters: Does the top-ranked cluster have the highest average DockQ? + ++++ Answer expand_more +
++No, cluster_2 has the highest average DockQ equal to 0.26+/-0.02. +
+
+_**Note**_ that if no reference structure is provided to the caprieval module, all statistics are calculated relative to the top-ranked (based on HADDOCK score) docking model. However, keep in mind that the top-ranked model may not necessarily represent the true solution. + +### Visualisation of the HADDOCK scores and their components + +Below the cluster statistics table, you'll find a series of plots displaying the HADDOCK score and its components against various metrics (i-RMSD, l-RMSD, FCC, DockQ), with clusters represented using color coding. The last rows show plots of cluster statistics, i.e. distributions of values per cluster, ordered by their HADDOCK score. + +These plots are interactive. A menu will appear at the top right, just above the last plot in the first row, when you hover your mouse over it. This menu allows you to zoom in and out of the plots and toggle the visibility of clusters. + ++ + + Inspect the plots. Which of the score components correlates best with the quality of the models? + + +Depending on the docking models, there could be a set of unclustered models. It will be explicitly shown in `report.html` as 'other'. You can see the origins of these models in `traceback/traceback.tsv`. + +### Visualisation of the docking models + +It's time to visualise some of the docking models. Let's take a look at `cluster_1_model_1.pdb.gz`, the best-ranked model; `cluster_1_model_4.pdb.gz`, the model with the lowest i-RMSD (as shown in the plot "HADDOCK score vs i-RMSD"); and the reference structure `3CRO_complex.pdb`. Please open all these files in PyMOL (it can display compressed PDB files). You can find the first two files in `10_seletopclusts`, and the reference structure in `pdbs`. Then, in the PyMOL command line, type: + + +show cartoon + + +color paleyellow, 3CRO_complex + + +align cluster_1_model_1 and chain B, 3CRO_complex and chain B + + +align cluster_1_mode_4 and chain B, 3CRO_complex and chain B + + ++
+++ See the overlay of the top ranked model onto the reference structure.expand_more +
+Reference structure is displyed in yellow; cluster_1_model_1 in green; cluster_1_model_4 in blue.
++++ ++
+ ++
+
+ +_**Note**_ that models are compressed because of the line `clean = true` in the global parameters of the workflow. To decompress all models in a directory, one can run `haddock3-unpack` + + +How close are these models to the reference? Did HADDOCK do a good job at ranking the docking models? + + +++ Answer expand_more +
++The models are acceptably close to the reference. Although the best model may not be top-ranked one, HADDOCK produces a reasonable ranking. +
+
+It may be helpful to examine several top-ranked models from each cluster. This can give you insight into the diversity of the models within a cluster, as well as the diversity of models across different clusters. + + Compare the top-ranked models within the same cluster, e.g. cluster_2. Are the top models from this cluster close to the reference structure? To each other? + + +
+ +## Congratulations! + + +You've reached the end of this basic protein-DNA docking tutorial. We hope it has been informative and helps you get started with your own docking projects. Check out the advanced version of this tutorial ([currently awaliable only for Haddock2.4 server](https://www.bonvinlab.org/education/HADDOCK24/HADDOCK24-protein-DNA-advanced/)) for deeper insights into protein-DNA docking! + +Happy docking! diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/report.html b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/report.html new file mode 100644 index 000000000..5ebeffadb --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/report.html @@ -0,0 +1,108 @@ +Analysis report of step 11_caprieval Analysis report of step 11_caprieval
+
+ + + +
++ + ++++ + +
++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/stat_analys_table.png b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/stat_analys_table.png new file mode 100644 index 000000000..d79720a78 Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/stat_analys_table.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-DNA-basic/stat_plots.png b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/stat_plots.png new file mode 100644 index 000000000..3fe7107ed Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-DNA-basic/stat_plots.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-glycan/index.md b/education/HADDOCK3/HADDOCK3-protein-glycan/index.md index 303084d96..82d206a62 100644 --- a/education/HADDOCK3/HADDOCK3-protein-glycan/index.md +++ b/education/HADDOCK3/HADDOCK3-protein-glycan/index.md @@ -31,6 +31,8 @@ In this tutorial we will be working with the catalytic domain of the *Humicola G *4-beta-glucopyranose*, as glycan (PDB code of the complex [1UU6](https://www.ebi.ac.uk/pdbe/entry/pdb/1uu6){:target="_blank"}). +The tutorial is based on [A. Ranaudo et al., *J. Chem. Inf. Model.* 64 (19), 7816-7825, 2024](https://pubs.acs.org/doi/10.1021/acs.jcim.4c01372){:target="_blank"}. +++ + +@@ -208,7 +210,7 @@ align 1UU6_l_u, 1UU6 ## Defining restraints for docking -### Visualing the information about the bindind site +### Visualising the information about the binding site Here we mimic a scenario where we have information about the glycan binding site on the protein, but no knowledge about which monosaccharide units are relevant for the binding. In this case (see Fig. 1), all the four beta-D-glucopyranose units are at the interface, although this might not be true in general, especially when longer glycans are considered. @@ -477,7 +479,7 @@ In CAPRI the quality of a model is defined as (for protein-protein complexes): As these metrics are for protein-protein complexes and glycans are typically smaller, it is best to use stricter metrics to assess the quality of the models. In the case of information-driven protein-glycan docking, the Fnat term is less relevant, as most contacts will typically be satisfied. -For protein-glycan modelling we recently proposed a different, stricter metric based on the interface ligand RMSD (ilRMSD) [ADD REFERENCE TO BIORXIV PREPRINT]: +For protein-glycan modelling we recently proposed a different, stricter metric based on the interface ligand RMSD (ilRMSD), see [A. Ranaudo et al., *J. Chem. Inf. Model.* 64 (19), 7816-7825, 2024](https://pubs.acs.org/doi/10.1021/acs.jcim.4c01372){:target="_blank"}: * **near acceptable model**: ilRMSD < 4Å * **acceptable model**: ilRMSD < 3Å diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/HADDOCK2-stages.png b/education/HADDOCK3/HADDOCK3-protein-protein-basic/HADDOCK2-stages.png new file mode 100644 index 000000000..52cee1c5c Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-protein-basic/HADDOCK2-stages.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/HADDOCK3-workflow-scheme.png b/education/HADDOCK3/HADDOCK3-protein-protein-basic/HADDOCK3-workflow-scheme.png new file mode 100644 index 000000000..bae241f6d Binary files /dev/null and b/education/HADDOCK3/HADDOCK3-protein-protein-basic/HADDOCK3-workflow-scheme.png differ diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/index.md b/education/HADDOCK3/HADDOCK3-protein-protein-basic/index.md new file mode 100644 index 000000000..717a02ba6 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-protein-basic/index.md @@ -0,0 +1,1555 @@ +--- +layout: page +title: "Protein-Protein modelling tutorial using a local version of HADDOCK3" +excerpt: "A tutorial describing the use of HADDOCK3 to model a Protein-Protein complex" +tags: [HADDOCK, HADDOCK3, installation, preparation, proteins, docking, analysis, workflows] +image: + feature: pages/banner_education-thin.jpg +--- +This tutorial consists of the following sections: + +* table of contents +{:toc} + + ![]()
+
+ +## Introduction + +This tutorial demonstrates the use of the new modular HADDOCK3 version for predicting the structure of a protein-protein complex from NMR chemical shift perturbation (CSP) data. +Namely, we will dock two E. coli proteins involved in glucose transport: the glucose-specific enzyme IIA (E2A) and the histidine-containing phosphocarrier protein (HPr). +The structures in the free form have been determined using X-ray crystallography (E2A) (PDB ID [1F3G](https://www.ebi.ac.uk/pdbe/entry/pdb/1f3g){:target="_blank"}) +and NMR spectroscopy (HPr) (PDB ID [1HDN](https://www.ebi.ac.uk/pdbe/entry/pdb/1hdn){:target="_blank"}). +The structure of the native complex has also been determined with NMR (PDB ID [1GGR](https://www.ebi.ac.uk/pdbe/entry/pdb/1ggr){:target="_blank"}). +These NMR experiments have also provided us with an array of data on the interaction itself +(chemical shift perturbations, intermolecular NOEs, residual dipolar couplings, and simulated diffusion anisotropy data), which will be useful for the docking. +For this tutorial, we will only make use of inteface residues identified from NMR chemical shift perturbation data as described +in [Wang *et al*, EMBO J (2000)](https://onlinelibrary.wiley.com/doi/10.1093/emboj/19.21.5635/abstract){:target="_blank"}. + +Throughout the tutorial, colored text will be used to refer to questions or instructions, and/or PyMOL commands. + +This is a question prompt: try answering it! +This an instruction prompt: follow it! +This is a PyMOL prompt: write this in the PyMOL command line prompt! +This is a Linux prompt: insert the commands in the terminal! + +
+
+ +## Setup/Requirements + +In order to follow this tutorial you will need to work on a Linux or MacOSX +system. We will also make use of [**PyMOL**][link-pymol] (freely available for +most operating systems) in order to visualize the input and output data. We will +provide you links to download the various required software and data. + +Further we are providing pre-processed PDB files for docking and analysis (but the +preprocessing of those files will also be explained in this tutorial). The files have been processed +to facilitate their use in HADDOCK and for allowing comparison with the known reference +structure of the complex. For this _download and unzip the following_ +[zip archive](){:target="_blank"} +_and note the location of the extracted PDB files in your system_. In it you should find the following directories: + +* `haddock3`: Contains HADDOCK3 configuration and job files for the various scenarios in this tutorial +* `pdbs`: Contains the pre-processed PDB files +* `plots`: Contains pre-generated html plots for the various scenarios in this tutorial +* `restraints`: Contains the interface information and the correspond restraint files for HADDOCK +* `runs`: Contains pre-calculated (partial) run results for the various scenarios in this tutorial +* `scripts`: Contains a variety of scripts used in this tutorial + + +
+
+ +## HADDOCK general concepts + +HADDOCK (see [https://www.bonvinlab.org/software/haddock2.4](https://www.bonvinlab.org/software/haddock2.4){:target="_blank"}) +is a collection of python scripts derived from ARIA ([https://aria.pasteur.fr](https://aria.pasteur.fr){:target="_blank"}) +that harness the power of CNS (Crystallography and NMR System – [https://cns-online.org](https://cns-online.org){:target="_blank"}) +for structure calculation of molecular complexes. What distinguishes HADDOCK from other docking software is its ability, +inherited from CNS, to incorporate experimental data as restraints and use these to guide the docking process alongside +traditional energetics and shape complementarity. Moreover, the intimate coupling with CNS endows HADDOCK with the +ability to actually produce models of sufficient quality to be archived in the Protein Data Bank. + +A central aspect to HADDOCK is the definition of Ambiguous Interaction Restraints or AIRs. These allow the +translation of raw data such as NMR chemical shift perturbation or mutagenesis experiments into distance +restraints that are incorporated in the energy function used in the calculations. AIRs are defined through +a list of residues that fall under two categories: active and passive. Generally, active residues are those +of central importance for the interaction, such as residues whose knockouts abolish the interaction or those +where the chemical shift perturbation is higher. Throughout the simulation, these active residues are +restrained to be part of the interface, if possible, otherwise incurring in a scoring penalty. Passive residues +are those that contribute for the interaction, but are deemed of less importance. If such a residue does +not belong in the interface there is no scoring penalty. Hence, a careful selection of which residues are +active and which are passive is critical for the success of the docking. + + +
+
+ +## A brief introduction to HADDOCK3 + + +HADDOCK3 is the next generation integrative modelling software in the +long-lasting HADDOCK project. It represents a complete rethinking and rewriting +of the HADDOCK2.X series, implementing a new way to interact with HADDOCK and +offering new features to users who can now define custom workflows. + +In the previous HADDOCK2.x versions, users had access to a highly +parameterisable yet rigid simulation pipeline composed of three steps: +`rigid-body docking (it0)`, `semi-flexible refinement (it1)`, and `final refinement (itw)`. + ++ + +In HADDOCK3, users have the freedom to configure docking workflows into +functional pipelines by combining the different HADDOCK3 modules, thus +adapting the workflows to their projects. HADDOCK3 has therefore developed to +truthfully work like a puzzle of many pieces (simulation modules) that users can +combine freely. To this end, the “old” HADDOCK machinery has been modularized, +and several new modules added, including third-party software additions. As a +result, the modularization achieved in HADDOCK3 allows users to duplicate steps +within one workflow (e.g., to repeat twice the `it1` stage of the HADDOCK2.x +rigid workflow). + +Note that, for simplification purposes, at this time, not all functionalities of +HADDOCK2.x have been ported to HADDOCK3, which does not (yet) support NMR RDC, +PCS and diffusion anisotropy restraints, cryo-EM restraints and coarse-graining. +Any type of information that can be converted into ambiguous interaction +restraints can, however, be used in HADDOCK3, which also supports the +*ab initio* docking modes of HADDOCK. + ++
+ + +To keep HADDOCK3 modules organized, we catalogued them into several +categories. But, there are no constraints on piping modules of different +categories. + +The main module categories are "topology", "sampling", "refinement", +"scoring", and "analysis". There is no limit to how many modules can belong to a +category. Modules are added as developed, and new categories will be created +if/when needed. You can access the HADDOCK3 documentation page for the list of +all categories and modules. Below is a summary of the available modules: + +* **Topology modules** + * `topoaa`: *generates the all-atom topologies for the CNS engine.* +* **Sampling modules** + * `rigidbody`: *Rigid body energy minimization with CNS (`it0` in haddock2.x).* + * `lightdock`: *Third-party glow-worm swam optimization docking software.* + * 'gdock': *Gdock integration sampling modulex.* +* **Model refinement modules** + * `flexref`: *Semi-flexible refinement using a simulated annealing protocol through molecular dynamics simulations in torsion angle space (`it1` in haddock2.x).* + * `emref`: *Refinement by energy minimisation (`itw` EM only in haddock2.4).* + * `mdref`: *Refinement by a short molecular dynamics simulation in explicit solvent (`itw` in haddock2.X).* + * `openmm`: *Molecular Dynamics refinement module.* +* **Scoring modules** + * `emscoring`: *scoring of a complex performing a short EM (builds the topology and all missing atoms).* + * `mdscoring`: *scoring of a complex performing a short MD in explicit solvent + EM (builds the topology and all missing atoms).* + * `prodigyligand`: *performs the scoring of input complexes using PRODIGY-ligand. It predicts deltaG of the complex and can return predictions as either deltaG or pKd values.* + * `prodigyprotein`: *performs the scoring of input complexes using PRODIGY (protein). It predicts deltaG of the complex and can return predictions as either deltaG or pKd values.* + * `sasascore`: *solvent accessibility analysis based on some user-defined residues that should be buried or accessible.* +* **Analysis modules** + * `caprieval`: *Calculates CAPRI metrics (i-RMSD, l-RMSD, Fnat, DockQ) with respect to the top scoring model or reference structure if provided.* + * `clustfcc`: *Clusters models based on the fraction of common contacts (FCC)* + * `clustrmsd`: *Clusters models based on pairwise RMSD matrix calculated with the `rmsdmatrix` module.* + * `rmsdmatrix`: *Calculates the pairwise RMSD matrix between all the models generated in the previous step.* + * `ilrmsdmatrix`: *calculates of the interface-ligand RMSD (ilRMSD) matrix between all the models generated in the previous step.* + * `seletop`: *Selects the top N models from the previous step.* + * `seletopclusts`: *Selects top N clusters from the previous step.* + * `alascan`: *For each model, the module will mutate the interface residues and calculate the energy differences between the wild type and the mutant, thus providing a measure of the impact of such mutation.* + * `contactmap`: *aims at generating heatmaps and chordcharts of the contacts observed in the input complexes.* +* **Extra modules** + * `exit`: *Stop the workflow when this module is reached.* + +The HADDOCK3 workflows are defined in simple configuration text files, similar to the TOML format but with extra features. +Contrarily to HADDOCK2.X which follows a rigid (yet highly parameterisable) +procedure, in HADDOCK3, you can create your own simulation workflows by +combining a multitude of independent modules that perform specialized tasks. + + ++
+
+ +## Software requirements + + +### Installing HADDOCK3 + +In this tutorial we will make use of the HADDOCK3 version. In case HADDOCK3 +is not pre-installed in your system you will have to install it. + +To obtain HADDOCK3 navigate to [its repository][haddock-repo], fill the +registration form, and then follow the [installation instructions](https://www.bonvinlab.org/haddock3/INSTALL.html){:target="_blank"}. + + +### Auxiliary software + +**[PDB-tools][link-pdbtools]**: A useful collection of Python scripts for the +manipulation (renumbering, changing chain and segIDs...) of PDB files is freely +available from our GitHub repository. `pdb-tools` is automatically installed +with HADDOCK3. If you have activated the HADDOCK3 Python environment you have +access to the pdb-tools package. + +**[PyMol][link-pymol]**: We will make use of PyMol for visualization. If not +already installed on your system, download and install PyMol. + + +
+
+ +## Preparing PDB files for docking + +In this section we will prepare the PDB files of the two proteins for docking. +Crystal structures are available from the [PDBe database](https://www.pdbe.org){:target="_blank"}. +Throughout this step, we will use `pdb-tools` from the command line. + +_**Note**_ that `pdb-tools` is also available as a [web service](https://wenmr.science.uu.nl/pdbtools/){:target="_blank"}. + + +_**Note**_: Before starting to work on the tutorial, make sure to activate haddock3 (follow the workshop-specific instructions above), or, e.g. if installed using `conda` + + +conda activate haddock3 + + + +
+ +### Inspecting and preparing E2A for docking + +We will now inspect the E2A structure. For this start PyMOL and in the command line window of PyMOL (indicated by PyMOL>) type: + + +fetch 1F3G
+show cartoon
+hide lines
+show sticks, resn HIS
+ + +You should see a backbone representation of the protein with only the histidine side-chains visible. +Try to locate the histidines in this structure. + +Is there any phosphate group present in this structure? + +Note that you can zoom on the histidines by typing in PyMOL: + +zoom resn HIS + +Revert to a full view with: + +zoom vis + +As a preparation step before docking, it is advised to remove any irrelevant water and other small molecules (e.g. small molecules from the crystallisation buffer), however do leave relevant co-factors if present. For E2A, the PDB file only contains water molecules. You can remove those in PyMOL by typing: + +remove resn HOH + +Now let us vizualize the residues affected by binding as identified by NMR. From [Wang *et al*, EMBO J (2000)](https://onlinelibrary.wiley.com/doi/10.1093/emboj/19.21.5635/abstract){:target="_blank"} the following residues of E2A were identified has having significant chemical shift perturbations: + +38,40,45,46,69,71,78,80,94,96,141 + +We will now switch to a surface representation of the molecule and highlight the NMR-defined interface. In PyMOL type the following commands: + + +color white, all
+show surface
+select e2a_active, (1F3G and resi 38,40,45,46,69,71,78,80,94,96,141)
+color red, e2a_active
+ + ++ + +Inspect the surface. + +Do the identified residues form a well defined patch on the surface? +Do they form a contiguous surface? + +The answer to the last question should be no: We can observe residue in the center of the patch that do not seem significantly affected while still being in the middle of the defined interface. This is the reason why in HADDOCK we also define "*passive*" residues that correspond to surface neighbors of active residues. These can be selected manually, or more conveniently you can let the HADDOCK server do it for you (see [Setting up the docking run](#setting-up-the-docking-run) below). + +As final step save the molecule as a new PDB file which we will call: *e2a_1F3G.pdb*+
+For this in the PyMOL menu on top select: + +File -> Export molecule... +Click on the save button +Select as ouptut format PDB (*.pdb *.pdb.gz) +Name your file *e2a_1F3G.pdb* and note its location + +After saving the molecule delete it from the Pymol window or close Pymol. You can remove the molecule by typing this into the command line window of PyMOL: + + +delete 1F3G + + +In a terminal, change the chain of e2a from a to B. + + +pdb_chain -B e2a_1F3G.pdb > e2a_1F3G_B.pdb + + +This will be usefull in the docking phase, as HADDOCK3 needs different chain associated to each protein involved in the docking. + +
+ +### Adding a phosphate group + +Since the biological function of this complex is to transfer a phosphate group from one protein to another, via histidines side-chains, it is relevant to make sure that a phosphate group be present for docking. As we have seen above none is currently present in the PDB files. HADDOCK does support a list of modified amino acids which you can find at the following link: [https://wenmr.science.uu.nl/haddock2.4/library](https://wenmr.science.uu.nl/haddock2.4/library){:target="_blank"}. + +Check the list of supported modified amino acids. +What is the proper residue name for a phospho-histidine in HADDOCK? + +In order to use a modified amino-acid in HADDOCK, the only thing you will need to do is to edit the PDB file and change the residue name of the amino-acid you want to modify. Don not bother deleting irrelevant atoms or adding missing ones, HADDOCK will take care of that. For E2A, the histidine that is phosphorylated has residue number 90. In order to change it to a phosphorylated histidine do the following: + +Edit the PDB file (*e2a_1F3G_B.pdb*) in your favorite editor +Change the name of histidine 90 to NEP +Save the file (as simple text file) under a new name, e.g. *e2aP_1F3G.pdb* + +**Note:** The same procedure can be used to introduce a mutation in an input protein structure. + + +
+ +### Inspecting and preparing HPR for docking + +We will now inspect the HPR structure. For this start PyMOL and in the command line window of PyMOL type: + + +fetch 1HDN
+show cartoon
+hide lines
+ + +Since this is an NMR structure it does not contain any water molecules and we don't need to remove them. + +Let's vizualize the residues affected by binding as identified by NMR. From [Wang *et al*, EMBO J (2000)](https://onlinelibrary.wiley.com/doi/10.1093/emboj/19.21.5635/abstract){:target="_blank"} the following residues were identified has having significant chemical shift perturbations: + +15,16,17,20,48,49,51,52,54,56 + +We will now switch to a surface representation of the molecule and highlight the NMR-defined interface. In PyMOL type the following commands: + + +color white, all
+show surface
+select hpr_active, (1HDN and resi 15,16,17,20,48,49,51,52,54,56)
+color red, hpr_active
+ + +Again, inspect the surface. + +Do the identified residues form a well defined patch on the surface? +Do they form a contiguous surface? + +Now since this is an NMR structure, it actually consists of an ensemble of models. HADDOCK can handle such ensemble, using each conformer in turn as starting point for the docking. We however recommend to limit the number of conformers used for docking, since the number of conformer combinations of the input molecules might explode (e.g. 10 conformers each will give 100 starting combinations and if we generate 1000 ridig body models (see [HADDOCK general concepts](#haddock-general-concepts) above) each combination will only be sampled 10 times). + +Now let's vizualise this NMR ensemble. In PyMOL type: + + +hide all
+show ribbon
+set all_states, on
+ + +You should now be seing the 30 conformers present in this NMR structure. To illustrate the potential benefit of using an ensemble of conformations as starting point for docking let's look at the side-chains of the active residues: + + +show lines, hpr_active
+ + ++ + +You should be able to see the amount of conformational space sampled by those surface side-chains. You can clearly see that some residues do sample a large variety of conformations, one of which might lead to much better docking results. + +**Note:** Pre-sampling of possible conformational changes can thus be beneficial for the docking, but again do limit the number of conformers used for the docking (or increase the number of sampled models, which is possible for users with expert- or guru-level access. The default access level is however only easy - for a higher level access do request it after registration). + +As final step, save the molecule as a new PDB file which we will call: *hpr-ensemble.pdb* +For this in the PyMOL menu select: + +File -> Export molecule... +Select as State 0 (all states) +Click on Save... +Select as ouptut format PDB (*.pdb *.pdb.gz) +Name your file *hpr-ensemble.pdb* and note its location + + ++
+
+ +## Defining restraints for docking + +Before setting up the docking we need first to generate distance restraint files +in a format suitable for HADDOCK. HADDOCK uses [CNS][link-cns]{:target="_blank"} as computational +engine. A description of the format for the various restraint types supported by +HADDOCK can be found in our [Nature Protocol][nat-pro]{:target="_blank"} paper, Box 4. + +Distance restraints are defined as: + ++assign (selection1) (selection2) distance, lower-bound correction, upper-bound correction ++ +The lower limit for the distance is calculated as: distance minus lower-bound +correction and the upper limit as: distance plus upper-bound correction. The +syntax for the selections can combine information about chainID - `segid` +keyword -, residue number - `resid` keyword -, atom name - `name` keyword. +Other keywords can be used in various combinations of OR and AND statements. +Please refer for that to the [online CNS manual](http://cns-online.org/v1.3/){:target="_blank"}. + +
+ +### Defining active and passive residues for E2A + +As stated before, the following residues were identified has having significant chemical shift perturbations from [Wang *et al*, EMBO J (2000)](https://onlinelibrary.wiley.com/doi/10.1093/emboj/19.21.5635/abstract){:target="_blank"}: + +38,40,45,46,69,71,78,80,94,96,141 + +Hence, we are using these residues as `active` residues for the docking run. However, we have to define `passive` residues before the run. +These passive residues allows us to deal with potentially incomplete binding sites by defining surface neighbors as `passive` residues. +These are added to the definition of the interface but will not lead to any energetic penalty if they are not part of the +binding site in the final models, while the residues defined as `active` (typically the identified or predicted binding +site residues) will. When using the HADDOCK server, `passive` residues will be automatically defined. Here since we are +using a local version, we need to define those manually and create a file in which the active and passive residues will be listed. + +This can easily be done using a haddock3 command line tool in the following way: + + +echo "38 40 45 46 69 71 78 80 94 96 141" > e2a.act-pass +haddock3-restraints passive_from_active e2a_1F3G.pdb 38,40,45,46,69,71,78,80,94,96,141 >> e2a.act-pass + + +The NMR-identified residues and their surface neighbors generated with the above command can be used to define ambiguous interactions restraints, either using the NMR identified residues as active in HADDOCK, or combining those with the surface neighbors and use this combination as passive only. Here we decided to treat the NMR-identified residues as active residues. +Note the file consists of two lines, with the first one defining the `active` residues and +the second line the `passive` ones. We will use later these files to generate the ambiguous distance restraints for HADDOCK. + +In general it is better to be too generous rather than too strict in the +definition of passive residues. + +An important aspect is to filter both the active (the residues identified from +your mapping experiment) and passive residues by their solvent accessibility. +Our web service uses a default relative accessibility of 15% as cutoff. This is +not a hard limit. You might consider including even more buried residues if some +important chemical group seems solvent accessible from a visual inspection. + +
+ +### Defining active and passive residues for HPR + +As stated before, the following residues were identified has having significant chemical shift perturbations from [Wang *et al*, EMBO J (2000)](https://onlinelibrary.wiley.com/doi/10.1093/emboj/19.21.5635/abstract){:target="_blank"}: + +15,16,17,20,48,49,51,52,54,56 + +Using the same haddock3 command line tool: + + +echo "15 16 17 20 48 49 51 52 54 56" > hpr.act-pass +haddock3-restraints passive_from_active hpr-ensemble.pdb 15,16,17,20,48,49,51,52,54,56 >> hpr.act-pass + + +
+ +### Defining the position restraints locally + +Once you have defined your active and passive residues for both molecules, you +can proceed with the generation of the ambiguous interaction restraints (AIR) file for HADDOCK. +For this you can either make use of our online [GenTBL][gentbl] web service, entering the +list of active and passive residues for each molecule, and saving the resulting +restraint list to a text file, or use the relevant `haddock-tools` script. + +To use our `haddock-tools` `active-passive-to-ambig.py` script (also found in the archive of the tutorial) you need to create for each molecule a file containing two lines: + +* The first line corresponds to the list of active residues (numbers separated by spaces) +* The second line corresponds to the list of passive residues. + +* For E2A (the file called `e2a.act-pass`): ++38 40 45 46 69 71 78 80 94 96 141 +35 37 39 42 43 44 47 48 64 66 68 70 72 74 81 82 83 84 86 88 97 98 99 100 105 109 110 131 132 133 142 143 144 145 ++ +* and for HPR (the file called `hpr.act-pass`): ++15 16 17 20 48 49 51 52 54 56 +9 10 11 12 21 24 25 37 38 40 41 43 45 46 47 53 55 57 58 59 60 84 85 ++ +Using those two files, we can generate the CNS-formatted AIR restraint files +with the following command: + + +haddock3-restraints active_passive_to_ambig e2a.act-pass hpr.act-pass \-\-segid-one A \-\-segid-two B > e2a-hpr_air.tbl + + +This generates a file called `ambig-prot-prot.tbl` that contains the AIR +restraints. The default distance range for those is between 0 and 2Å, which +might seem short but makes senses because of the 1/r^6 summation in the AIR +energy function that makes the effective distance be significantly shorter than +the shortest distance entering the sum. + +The effective distance is calculated as the SUM over all pairwise atom-atom +distance combinations between an active residue and all the active+passive on +the other molecule: SUM[1/r^6]^(-1/6). + +If you modify manually this file, it is possible to quickly check if the format is valid. +To do so, you can find in our [haddock-tools][haddock-tools] repository a folder named +`haddock_tbl_validation` that contains a script called `validate_tbl.py` (also provided here in the `scripts` directory). +To use it, type: + + +python ./scripts/validate_tbl.py \-\-silent e2a-hpr_air.tbl + + +No output means that your TBL file is valid. + +
+
+ +## Setting up the docking with HADDOCK3 + +Now that we have all required files at hand (PBD and restraints files) it is time to setup our docking protocol. +For this we need to create a HADDOCK3 configuration file that will define the docking workflow. In contrast to HADDOCK2.X, +we have much more flexibility in doing this. We will illustrate this flexibility by introducing a clustering step +after the initial rigid-body docking stage, select up to 10 models per cluster and refine all of those. + +HADDOCK3 also provides an analysis module (`caprieval`) that allows +to compare models to either the best scoring model (if no reference is given) or a reference structure, which in our case +we have at hand. This will directly allow us to assess the performance of the protocol for the following three scenarios: + +1. Scenario 1: 1000 rigidbody docking models, selection of top200 and flexible refinement + EM +3. Scenario 2: 1000 rigidbody docking models, FCC clustering and selection of max 20 models per cluster followed by flexible refinement and EM + +The basic workflow for all three scenarios will consists of the following modules, with some differences in the restraints used and some parameter settings (see below): + +1. **`topoaa`**: *Generates the topologies for the CNS engine and build missing atoms* +2. **`rigidbody`**: *Rigid body energy minimisation (`it0` in haddock2.x)* +3. **`clustfcc`**: *Clustering of models based on the fraction of common contacts (FCC)* +4. **`seletopclusts`**: *Selection of the top10 models of all clusters* +5. **`flexref`**: *Semi-flexible refinement of the interface (`it1` in haddock2.4)* +6. **`emref`**: *Final refinement by energy minimisation (`itw` EM only in haddock2.4)* +7. **`clustfcc`**: *Clustering of models based on the fraction of common contacts (FCC)* +8. **`caprieval`**: *Calculates CAPRI metrics (i-RMSD, l-RMSD, Fnat, DockQ) with respect to the top scoring model or reference structure if provided* + +The input PDB files are the same for all two scenarios. The differences are in the sampling at the rigid body stage. + + +
+ +### HADDOCK3 execution modes + +HADDOCK3 currently supports three difference execution modes that are defined in the first section of the configuration file of a run. + +#### 1. local mode + +In this mode HADDOCK3 will run on the current system, using the defined number of cores (`ncores`) in the config file +to a maximum of the total number of available cores on the system minus one. An example of the relevant parameters to be defined in the first section of the config file is: + +{% highlight toml %} +# compute mode +mode = "local" +# 1 nodes x 96 ncores +ncores = 96 +{% endhighlight %} + +In this mode HADDOCK3 can be started from the command line with as argument the configuration file of the defined workflow. + + +haddock3 \+ + +Alternatively redirect the output to a log file and send haddock3 to the background. + +_**Note**_: This is the execution mode you should use on the NMRBox resources. For the tutorial we limit the number of cores to 10. + + + +haddock3 \\> haddock3.log & + + +_**Note**_: This is also the execution mode that should be used for example when submitting the HADDOCK3 job to a node of a cluster, requesting X number of cores. + +++ ++ View an example script for submitting via the slurm batch system expand_more +
+ + {% highlight shell %} + #!/bin/bash + #SBATCH --nodes=1 + #SBATCH --tasks-per-node=96 + #SBATCH -J haddock3 + #SBATCH --partition=medium + + # load haddock3 module + module load haddock3 + # or activate the haddock3 conda environment + ##source $HOME/miniconda3/etc/profile.d/conda.sh + ##conda activate haddock3 + + # go to the run directory + cd $HOME/HADDOCK3-protein-protein-basic + + # execute + haddock3 docking-protein-protein-full.cfg + {% endhighlight %} +
+
+ +#### 2. batch mode + +In this mode HADDOCK3 will typically be started on your local server (e.g. the login node) and will dispatch jobs to the batch system of your cluster. +Two batch systems are currently supported: `slurm` and `torque` (defined by the `batch_type` parameter). In the configuration file you will +have to define the `queue` name and the maximum number of concurrent jobs sent to the queue (`queue_limit`). Since HADDOCK3 single model +calculations are quite fast, it is recommended to calculate multiple models within one job submitted to the batch system. +The number of model per job is defined by the `concat` parameter in the configuration file. +You want to avoid sending thousands of very short jobs to the batch system if you want to remain friend with your system administrators... + +An example of the relevant parameters to be defined in the first section of the config file is: + +{% highlight toml %} +# compute mode +mode = "batch" +# batch system +batch_type = "slurm" +# queue name +queue = "short" +# number of concurrent jobs to submit to the batch system +queue_limit = 100 +# number of models to produce per submitted job +concat = 10 +{% endhighlight %} + +In this mode HADDOCK3 can be started from the command line as for the local mode. + +#### 3. MPI mode + +HADDOCK3 supports a parallel MPI implementation (functional but still very experimental at this stage). For this to work, the `mpi4py` library +must have been installed at installation time. Refer to the [MPI-related instructions](https://www.bonvinlab.org/haddock3/tutorials/mpi.html). +The execution mode should be set to `mpi` and the total number of cores should match the requested resources when submitting to the batch system. + +An example of the relevant parameters to be defined in the first section of the config file is: + +{% highlight toml %} +# compute mode +mode = "mpi" +# 1 nodes x 50 tasks = ncores = 50 +ncores = 50 +{% endhighlight %} + +In this execution mode the HADDOCK3 job should be submitted to the batch system requesting the corresponding number of nodes and cores per node. + +++ ++ View an example script for submitting an MPI HADDOCK3 job the slurm batch system expand_more +
+ {% highlight shell %} + #!/bin/bash + #SBATCH --nodes=5 + #SBATCH --tasks-per-node=50 + #SBATCH -J haddock3mpi + + # load haddock3 module + module load haddock3 + # or make sure haddock3 is activated + ##source $HOME/miniconda3/etc/profile.d/conda.sh + ##conda activate haddock3 + + # go to the run directory + # edit if needed to specify the correct location + cd $HOME/HADDOCK3-protein-protein-basic + + # execute + haddock3 docking-protein-protein-full.cfg + {% endhighlight %} +
+
+ +### Scenario 1: 1000 rigidbody docking models, selection of top 200 and flexible refinement + EM + +Now that we have all data ready, and know about execution modes of HADDOCK3 it is time to setup the docking for the first scenario. The restraint file to use for this is `e2a-hpr_air.tbl`. We proceed to produce 1000 rigidbody docking models, from which 200 will be selected and refined through flexible refinment and energy minimization. +For the analysis following the docking results, we are using the solved complex [1GGR](https://www.rcsb.org/structure/1GGR), named e2a-hpr_1GGR.pdb. +The configuration file for this scenario is: + +{% highlight toml %} +# ==================================================================== +# Protein-protein docking example with NMR-derived ambiguous interaction restraints + +# directory in which the scoring will be done +run_dir = "scenario1-full" + +# execution mode +mode = "local" +# maximum of 50 cores (limited by the number of available cores) +ncores = 50 + +# molecules to be docked +molecules = [ + "data/e2aP_1F3G.pdb", + "data/hpr_ensemble.pdb" + ] + +# ==================================================================== +# Parameters for each stage are defined below, prefer full paths +# ==================================================================== +[topoaa] +autohis = false +[topoaa.mol1] +nhisd = 0 +nhise = 1 +hise_1 = 75 +[topoaa.mol2] +nhisd = 1 +hisd_1 = 76 +nhise = 1 +hise_1 = 15 + +[rigidbody] +tolerance = 5 +ambig_fname = "data/e2a-hpr_air.tbl" + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[seletop] + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[flexref] +tolerance = 5 +ambig_fname = "data/e2a-hpr_air.tbl" + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[emref] +tolerance = 5 +ambig_fname = "data/e2a-hpr_air.tbl" + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[clustfcc] + +[seletopclusts] + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +# ==================================================================== +{% endhighlight %} + +This configuration file is provided in the `haddock3` directory of the downloaded data set for this tutorial as `docking-protein-protein-full.cfg`. + +If you have everything ready, you can launch haddock3 either from the command line, or, better, +submitting it to the batch system requesting in this local run mode a full node (see local execution mode above). + +
+ +### Scenario 2: 1000 rigidbody docking models, FCC clustering and selection of max 20 models per cluster followed by flexible refinement and EM + +In scenario 2, we proceed to produce 1000 rigidbody docking models, from which we proceed to do a first clustering analysis. From the top clusters a flexible refinment then energy minization is done. +For the analysis following the docking results, we are using the solved complex [1GGR](https://www.rcsb.org/structure/1GGR), named e2a-hpr_1GGR.pdb. +The configuration file for this scenario is: + +{% highlight toml %} +# ==================================================================== +# Protein-protein docking example with NMR-derived ambiguous interaction restraints +# ==================================================================== + +# directory in which the scoring will be done +run_dir = "scenario2-cltsel-full" + +# execution mode +mode = "local" +# maximum of 50 cores (limited by the number of available cores) +ncores = 50 + +# molecules to be docked +molecules = [ + "data/e2aP_1F3G.pdb", + "data/hpr_ensemble.pdb" + ] + +# ==================================================================== +# Parameters for each stage are defined below, prefer full paths +# ==================================================================== + +[topoaa] +autohis = false +[topoaa.mol1] +nhisd = 0 +nhise = 1 +hise_1 = 75 +[topoaa.mol2] +nhisd = 1 +hisd_1 = 76 +nhise = 1 +hise_1 = 15 + +[rigidbody] +tolerance = 5 +ambig_fname = "data/e2a-hpr_air.tbl" + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[clustfcc] + +[seletopclusts] +# select the best 20 models of each cluster +top_models = 20 + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[flexref] +tolerance = 5 +ambig_fname = "data/e2a-hpr_air.tbl" + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[emref] +tolerance = 5 +ambig_fname = "data/e2a-hpr_air.tbl" + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +[clustfcc] + +[seletopclusts] + +[caprieval] +reference_fname = "data/e2a-hpr_1GGR.pdb" + +# ==================================================================== +{% endhighlight %} + +This configuration file is provided in the `haddock3` directory of the downloaded data set for this tutorial as `docking-protein-protein-cltsel-full.cfg`. + +If you have everything ready, you can launch haddock3 either from the command line, or, better, submitting it to the batch system requesting in this local run mode a full node (see local execution mode above). + +
+
+ +## Analysis of docking results + +### Structure of the run directory + +Once your run has completed inspect the content of the resulting directory. You will find the various steps (modules) of the defined workflow numbered sequentially, e.g.: + +{% highlight shell %} +> ls scenario2/ + 00_topoaa/ + 01_rigidbody/ + 02_caprieval/ + 03_clustfcc/ + 04_seletopclusts/ + 05_caprieval/ + 06_flexref/ + 07_caprieval/ + 08_emref/ + 09_caprieval/ + 10_clustfcc/ + 11_seletopclusts/ + 12_caprieval/ + analysis/ + data/ + log +{% endhighlight %} + +There is in addition the log file (text file) and two additional directories: + +- the `data` directory containing the input data (PDB and restraint files) for the various modules +- the `analysis` directory containing various plots to visualise the results for each `caprieval` step + +You can find information about the duration of the run at the bottom of the log file. Each sampling/refinement/selection module will contain PBD files. + +For example, the `X_seletopclusts` directory contains the selected models from each cluster. The clusters in that directory are numbered based +on their rank, i.e. `cluster_1` refers to the top-ranked cluster. Information about the origin of these files can be found in that directory in the `seletopclusts.txt` file. + +The simplest way to extract ranking information and the corresponding HADDOCK scores is to look at the `X_caprieval` directories (which is why it is a good idea to have it as the final module, and possibly as intermediate steps). This directory will always contain a `capri_ss.tsv` file, which contains the model names, rankings and statistics (score, iRMSD, Fnat, lRMSD, ilRMSD and dockq score). E.g.: + ++model md5 caprieval_rank score irmsd fnat lrmsd ilrmsd dockq cluster_id cluster_ranking model-cluster_ranking air angles bonds bsa cdih coup dani desolv dihe elec improper rdcs rg sym total vdw vean xpcs +../07_emref/emref_33.pdb - 1 -147.229 0.894 0.889 1.452 1.542 0.866 - - - 6.877 0.000 0.000 1533.550 0.000 0.000 0.000 -10.230 0.000 -522.517 0.000 0.000 0.000 0.000 -548.824 -33.184 0.000 0.000 +../07_emref/emref_3.pdb - 2 -145.818 0.949 0.917 2.103 1.801 0.858 - - - 7.810 0.000 0.000 1569.000 0.000 0.000 0.000 -9.026 0.000 -533.832 0.000 0.000 0.000 0.000 -556.827 -30.806 0.000 0.000 +../07_emref/emref_52.pdb - 3 -141.925 1.016 0.889 1.378 1.678 0.850 - - - 12.488 0.000 0.000 1591.170 0.000 0.000 0.000 -9.507 0.000 -482.747 0.000 0.000 0.000 0.000 -507.376 -37.117 0.000 0.000 +../07_emref/emref_4.pdb - 4 -141.400 1.067 0.778 2.299 2.094 0.791 - - - 4.617 0.000 0.000 1515.630 0.000 0.000 0.000 -10.495 0.000 -526.925 0.000 0.000 0.000 0.000 -548.288 -25.981 0.000 0.000 +../07_emref/emref_81.pdb - 5 -137.507 1.569 0.639 4.430 3.047 0.634 - - - 30.617 0.000 0.000 1562.350 0.000 0.000 0.000 -16.298 0.000 -442.005 0.000 0.000 0.000 0.000 -447.257 -35.870 0.000 0.000 +.... ++ +If clustering is performed prior to calling the `caprieval` module, the `capri_ss.tsv` will also contain information about to which cluster the model belongs to and its ranking within the cluster as shown above. + +The relevant statistics are: + +* **score**: *the HADDOCK score (arbitrary units)* +* **irmsd**: *the interface RMSD, calculated over the interfaces the molecules* +* **fnat**: *the fraction of native contacts* +* **lrmsd**: *the ligand RMSD, calculated on the ligand after fitting on the receptor (1st component)* +* **ilrmsd**: *the interface-ligand RMSD, calculated over the interface of the ligand after fitting on the interface of the receptor (more relevant for small ligands for example)* +* **dockq**: *the DockQ score, which is a combination of irmsd, lrmsd and fnat and provides a continuous scale between 1 (equal to reference) and 0* + +The iRMSD, lRMSD and Fnat metrics are the ones used in the blind protein-protein prediction experiment [CAPRI](https://capri.ebi.ac.uk/) (Critical PRediction of Interactions). + +In CAPRI the quality of a model is defined as (for protein-protein complexes): + +* **acceptable model**: i-RMSD < 4Å or l-RMSD<10Å and Fnat > 0.1 +* **medium quality model**: i-RMSD < 2Å or l-RMSD<5Å and Fnat > 0.3 +* **high quality model**: i-RMSD < 1Å or l-RMSD<1Å and Fnat > 0.5 + + +What is based on this CAPRI criterion the quality of the best model listed above (emref_33.pdb)? + + +In case the `caprieval` module is called after a clustering step an additional file will be present in the directory: `capri_clt.tsv`. +This file contains the cluster ranking and score statistics, averaged over the minimum number of models defined for clustering +(4 by default), with their corresponding standard deviations. E.g.: + ++cluster_rank cluster_id n under_eval score score_std irmsd irmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank +1 1 4 - -124.146 3.141 1.022 0.113 0.785 0.074 1.812 0.477 0.808 0.045 19.088 8.507 1493.348 101.980 -14.374 2.868 -390.668 44.141 -405.127 39.003 -33.547 7.112 1 +2 2 4 - -109.733 4.447 8.384 0.538 0.153 0.063 15.962 0.969 0.135 0.029 64.065 29.996 1461.225 113.842 -13.164 2.827 -394.903 13.092 -354.834 34.074 -23.996 4.254 2 +3 6 4 - -105.989 3.889 4.022 0.232 0.243 0.050 6.572 0.337 0.331 0.025 38.555 17.146 1385.205 39.561 -6.273 3.174 -425.420 56.558 -405.353 38.939 -18.487 5.586 3 +... ++ +In this file you find the cluster rank, the cluster ID (which is related to the size of the cluster, 1 being always the largest cluster), the number of models (n) in the cluster and the corresponding statistics (averages + standard deviations). The corresponding cluster PDB files will be found in the processing `X_seletopclusts` directory. + +
+ +### Analysis scenario 1: + +Let us now analyze the docking results for this scenario. Use for that either your own run or a pre-calculated run provided in the `runs` directory (note that to save space only partial data have been kept in this pre-calculated runs, but all relevant information for this tutorial is available). + +First of all let us check the final cluster statistics. + +Inspect the _capri_clt.tsv_ file + ++++View the pre-calculated 11_caprieval/capri_clt.tsv file expand_more +
++cluster_rank cluster_id n under_eval score score_std irmsdirmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std ilrmsd ilrmsd_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank +1 1 132 - -136.315 2.459 0.922 0.050 0.847 0.0501.497 0.158 0.848 0.022 1.577 0.100 17.510 10.499 1592.155 26.85-11.290 2.460 -477.868 20.524 -491.561 13.390 -31.203 1.856 1 +2 2 41 - -118.410 9.418 7.843 0.237 0.194 0.00014.976 0.870 0.158 0.008 14.161 0.256 33.123 27.142 1525.405 19.48-11.788 2.649 -396.013 33.391 -393.621 55.889 -30.732 8.145 2 +3 3 8 - -87.144 5.206 3.741 0.418 0.333 0.039 7.4090.410 0.348 0.025 7.643 0.422 41.435 13.967 1290.872 72.223 -15.930 4.468 -245.765 38.007 -230.535 31.740 -26.204 3.205 3 +4 4 4 - -55.138 9.488 2.340 0.218 0.292 0.031 5.7850.727 0.424 0.019 5.334 0.773 42.306 19.922 960.189 142.370 -13.059 3.913-158.379 14.190 -130.707 26.432 -14.634 3.528 4 ++
+ +How many clusters are generated? + +Look at the score of the first few clusters: Are they significantly different if you consider their average scores and standard deviations? + +Since for this tutorial we have at hand the crystal structure of the complex, we provided it as reference to the `caprieval` modules. +This means that the iRMSD, lRMSD, Fnat and DockQ statistics report on the quality of the docked model compared to the reference crystal structure. + +How many clusters of acceptable or better quality have been generate according to CAPRI criteria? + +What is the rank of the best cluster generated? + +What is the rank of the first acceptable of better cluster generated? + + +We are providing in the `scripts` directory a simple script that extract some cluster statistics for acceptable or better clusters from the `caprieval` steps. +To use is simply call the script with as argument the run directory you want to analyze, e.g.: + + + ./scripts/extract-capri-stats-clt.sh ./scenario1 + + +++ ++ View the output of the script expand_more +
++============================================== +== scenario1-full/02_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 0 out of 1 +Total number of medium or better clusters: 0 out of 1 +Total number of high quality clusters: 0 out of 1 + +First acceptable cluster - rank: i-RMSD: Fnat: DockQ: +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: - i-RMSD: 6.407 Fnat: 0.202 DockQ: 0.264 +============================================== +== scenario1-full/04_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 0 out of 1 +Total number of medium or better clusters: 0 out of 1 +Total number of high quality clusters: 0 out of 1 + +First acceptable cluster - rank: i-RMSD: Fnat: DockQ: +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: - i-RMSD: 6.407 Fnat: 0.202 DockQ: 0.264 +============================================== +== scenario1-full/06_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 1 out of 1 +Total number of medium or better clusters: 0 out of 1 +Total number of high quality clusters: 0 out of 1 + +First acceptable cluster - rank: - i-RMSD: 2.976 Fnat: 0.611 DockQ: 0.601 +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: - i-RMSD: 2.976 Fnat: 0.611 DockQ: 0.601 +============================================== +== scenario1-full/08_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 1 out of 1 +Total number of medium or better clusters: 1 out of 1 +Total number of high quality clusters: 0 out of 1 + +First acceptable cluster - rank: - i-RMSD: 1.673 Fnat: 0.736 DockQ: 0.727 +First medium cluster - rank: - i-RMSD: 1.673 Fnat: 0.736 DockQ: 0.727 +Best cluster - rank: - i-RMSD: 1.673 Fnat: 0.736 DockQ: 0.727 +============================================== +== scenario1-full/11_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 3 out of 4 +Total number of medium or better clusters: 1 out of 4 +Total number of high quality clusters: 1 out of 4 + +First acceptable cluster - rank: 1 i-RMSD: 0.922 Fnat: 0.847 DockQ: 0.848 +First medium cluster - rank: 1 i-RMSD: 0.922 Fnat: 0.847 DockQ: 0.848 +Best cluster - rank: 1 i-RMSD: 0.922 Fnat: 0.847 DockQ: 0.848 ++
+ +Similarly some simple statistics can be extracted from the single model `caprieval` `capri_ss.tsv` files with the `extract-capri-stats.sh` script: + + + + ./scripts/extract-capri-stats.sh ./runs/scenario1-surface + + +++ ++View the output of the script: expand_more +
++============================================== +== scenario1-full/02_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 365 out of 1000 +Total number of medium or better models: 199 out of 1000 +Total number of high quality models: 0 out of 1000 + +First acceptable model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +First medium model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +Best model - rank: 46 i-RMSD: 1.145 Fnat: 0.556 DockQ: 0.713 +============================================== +== scenario1-full/04_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 144 out of 200 +Total number of medium or better models: 137 out of 200 +Total number of high quality models: 0 out of 200 + +First acceptable model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +First medium model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +Best model - rank: 46 i-RMSD: 1.145 Fnat: 0.556 DockQ: 0.713 +============================================== +== scenario1-full/06_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 147 out of 200 +Total number of medium or better models: 118 out of 200 +Total number of high quality models: 20 out of 200 + +First acceptable model - rank: 2 i-RMSD: 1.221 Fnat: 0.694 DockQ: 0.727 +First medium model - rank: 2 i-RMSD: 1.221 Fnat: 0.694 DockQ: 0.727 +Best model - rank: 30 i-RMSD: 0.883 Fnat: 0.750 DockQ: 0.823 +============================================== +== scenario1-full/08_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 147 out of 200 +Total number of medium or better models: 118 out of 200 +Total number of high quality models: 34 out of 200 + +First acceptable model - rank: 1 i-RMSD: 1.219 Fnat: 0.833 DockQ: 0.787 +First medium model - rank: 1 i-RMSD: 1.219 Fnat: 0.833 DockQ: 0.787 +Best model - rank: 39 i-RMSD: 0.807 Fnat: 0.833 DockQ: 0.862 +============================================== +== scenario1-full/11_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 141 out of 185 +Total number of medium or better models: 116 out of 185 +Total number of high quality models: 34 out of 185 + +First acceptable model - rank: 1 i-RMSD: 0.907 Fnat: 0.917 DockQ: 0.871 +First medium model - rank: 1 i-RMSD: 0.907 Fnat: 0.917 DockQ: 0.871 +Best model - rank: 36 i-RMSD: 0.807 Fnat: 0.833 DockQ: 0.862 ++
+ +_**Note**_ that this kind of analysis only makes sense when we know the reference complex and for benchmarking / performance analysis purposes. + +Look at the single structure statistics provided by the script + +How does the quality of the model changes after flexible refinement? Consider here the various metrics. + +++ ++ Answer expand_more +
++ In terms of iRMSD values we only observe very small differences in the best models, but the change in ranking is impressive! + The fraction of native contacts and the DockQ scores are however improving much more after flexible refinement. + All this will of course depend on how different are the bound and unbound conformations and the amount of data + used to drive the docking process. In general, from our experience, the more and better data at hand, + the larger the conformational changes that can be induced. +
+
+ +Is the best model always rank as first? + +++ ++ Answer expand_more +
++ This is clearly not the case. The scoring function is not perfect, but does a reasonable job in ranking models of acceptable or better quality on top in this case. +
+
+ +#### Analysis scenario 1: visualising the scores and their components + +We have precalculated a number of interactive plots to visualize the scores and their components versus ranks and model quality. + + +Examine the plots (remember here that higher DockQ values and lower i-RMSD values correspond to better models) + + +Models statistics: + +* [iRMSD versus HADDOCK score](plots/scenario1/irmsd_score.html){:target="_blank"} +* [DockQ versus HADDOCK score](plots/scenario1/dockq_score.html){:target="_blank"} + +Cluster statistics (distributions of values per cluster ordered according to their HADDOCK rank): + +* [HADDOCK scores](plots/scenario1/score_clt.html){:target="_blank"} +* [iRMSD](plots/scenario1/irmsd_clt.html){:target="_blank"} +* [DockQ](plots/scenario1/dockq_clt.html){:target="_blank"} + +
+ +### Analysis scenario 2: + +Let us now analyse the docking results for this scenario. Use for that either your own run or a pre-calculated run provided in the `runs` directory. +Go into the _analysis/_caprieval_analysis_ directory of the respective run directory and + +Inspect the final cluster statistics in _capri_clt.tsv_ file + +++ +How many clusters of acceptable or better quality have been generate according to CAPRI criteria? + +What is the rank of the best cluster generated? + +What is the rank of the first acceptable of better cluster generated? + + +In this run we also had a `caprieval` after the clustering of the rigid body models (step 5 of our workflow). + +Inspect the corresponding _capri_clt.tsv_ file + ++View the pre-calculated _caprieval/capri_clt.tsv file expand_more +
++cluster_rank cluster_id n under_eval score score_std irmsdirmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std ilrmsd ilrmsd_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank +1 1 37 - -124.658 10.252 3.857 0.922 0.319 0.0976.660 1.321 0.365 0.085 6.780 1.681 31.615 22.188 1616.990 119.623 -6.778 6.101 -437.724 75.263 -439.605 69.689 -33.496 10.191 +2 3 26 - -119.435 4.949 0.982 0.026 0.805 0.0522.058 0.408 0.816 0.026 1.781 0.120 29.748 11.990 1522.275 58.81-14.346 5.089 -383.147 87.371 -384.832 89.623 -31.434 9.369 2 +3 8 15 - -117.501 8.381 10.507 0.012 0.049 0.02318.245 0.253 0.082 0.008 17.406 0.097 16.941 13.148 1695.840 65.32-11.683 2.440 -305.048 31.227 -334.610 34.942 -46.502 3.538 3 +4 10 12 - -115.472 5.836 0.980 0.038 0.812 0.0532.062 0.443 0.819 0.020 1.762 0.114 19.993 10.271 1488.888 64.59-14.848 1.764 -351.208 32.914 -363.598 30.092 -32.382 8.809 4 +5 2 27 - -106.389 2.683 9.379 0.146 0.125 0.01416.285 0.693 0.122 0.004 16.768 0.405 20.260 12.423 1359.715 23.92-10.242 1.447 -272.409 33.608 -295.839 37.474 -43.691 4.786 5 +6 4 25 - -106.037 2.709 7.852 0.619 0.132 0.07715.047 1.787 0.139 0.042 14.277 0.588 43.187 10.734 1403.977 70.15-13.256 1.092 -361.058 57.590 -342.759 55.416 -24.888 11.965 6 +7 9 13 - -105.524 8.380 10.273 0.355 0.076 0.01217.160 0.297 0.098 0.005 16.986 0.601 52.965 34.487 1493.557 88.840.241 1.661 -433.424 58.594 -404.836 36.296 -24.376 10.058 7 +8 13 11 - -104.016 12.736 6.651 1.287 0.215 0.04112.319 2.028 0.201 0.046 11.777 2.166 67.269 34.762 1452.928 53.36-7.209 2.522 -367.069 36.068 -329.921 62.280 -30.121 8.255 8 +9 12 11 - -100.932 9.238 10.829 0.016 0.132 0.01218.562 0.153 0.108 0.004 17.755 0.101 32.367 14.729 1645.305 104.797 -18.030 2.335 -232.574 32.271 -239.830 42.371 -39.624 7.4289 +... +++++View the pre-calculated 5_caprieval/capri_clt.tsv file expand_more +
++cluster_rank cluster_id n under_eval score score_std irmsdirmsd_std fnat fnat_std lrmsd lrmsd_std dockq dockq_std ilrmsd ilrmsd_std air air_std bsa bsa_std desolv desolv_std elec elec_std total total_std vdw vdw_std caprieval_rank +1 4 20 - -32.647 0.718 8.443 0.050 0.056 0.000 16.670.440 0.098 0.003 15.142 0.029 103.171 30.153 1037.440 40.574 -16.600 0.384 -6.642 0.367 90.292 34.825 -6.237 4.862 1 +2 1 20 - -32.078 0.309 1.193 0.052 0.563 0.012 2.3440.382 0.701 0.015 2.241 0.176 144.927 37.448 1185.507 24.515 -14.154 0.495 -7.458 0.197 131.527 41.553 -5.942 4.909 2 +3 11 15 - -31.524 0.512 2.591 0.043 0.306 0.000 5.8830.150 0.411 0.006 5.951 0.125 238.270 90.904 838.533 10.610 -17.621 0.383-7.971 0.168 237.269 95.233 6.969 4.900 3 +4 23 6 - -31.175 0.237 4.180 0.009 0.285 0.012 7.7030.015 0.316 0.004 8.171 0.036 217.839 78.900 1071.035 16.129 -15.642 0.348 -6.892 0.257 199.998 83.806 -10.949 5.140 4 +5 32 4 - -30.152 1.356 7.126 0.074 0.069 0.024 16.690.938 0.106 0.014 12.952 0.455 286.907 150.515 1041.192 37.687 -13.618 0.880 -8.950 0.629 273.851 150.190 -4.106 4.566 5 +6 33 4 - -29.431 2.824 2.660 0.894 0.326 0.121 7.1353.087 0.407 0.141 6.418 2.586 124.179 48.395 917.899 78.204 -13.272 2.489-8.230 0.566 116.856 51.814 0.907 4.084 6 +7 2 20 - -27.915 0.952 4.133 0.017 0.139 0.020 7.0510.018 0.282 0.007 7.455 0.023 264.450 31.588 1014.276 17.755 -11.711 0.867 -8.673 0.226 252.511 36.667 -3.266 5.371 7 +8 17 11 - -27.474 1.291 6.676 0.703 0.063 0.012 11.461.246 0.157 0.014 12.049 1.207 303.023 57.328 963.135 62.068 -12.556 1.856-8.338 0.587 296.748 55.220 2.063 6.790 8 +9 13 14 - -27.374 0.754 10.733 0.011 0.083 0.000 18.250.037 0.094 0.000 17.522 0.031 134.468 43.797 1039.598 13.308 -13.613 0.558 -4.687 0.149 127.422 45.173 -2.360 2.548 9 +... ++
+ +How many clusters are generated? + +Is this the same number that after refinement (see above)? + +If not what could be the reason? + +Consider again the rank of the first acceptable cluster based on iRMSD values. How does this compare with the refined clusters (see above)? + +++ ++ Answer expand_more +
++ After rigid body docking the first acceptable cluster is at rank 3 and the same is true after refinement, but the iRMSD values have improved. +
+
+ +Use the `extract-capri-stats-clt.sh` script to extract some simple cluster statistics for this run. + + + ./scripts/extract-capri-stats-clt.sh runs/scenario2/ + + + +++ ++ View the output of the script expand_more +
++============================================== +== scenario2-cltsel-full/02_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 0 out of 1 +Total number of medium or better clusters: 0 out of 1 +Total number of high quality clusters: 0 out of 1 + +First acceptable cluster - rank: i-RMSD: Fnat: DockQ: +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: - i-RMSD: 6.407 Fnat: 0.202 DockQ: 0.264 +============================================== +== scenario2-cltsel-full/05_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 6 out of 33 +Total number of medium or better clusters: 1 out of 33 +Total number of high quality clusters: 0 out of 33 + +First acceptable cluster - rank: 2 i-RMSD: 1.193 Fnat: 0.563 DockQ: 0.701 +First medium cluster - rank: 2 i-RMSD: 1.193 Fnat: 0.563 DockQ: 0.701 +Best cluster - rank: 2 i-RMSD: 1.193 Fnat: 0.563 DockQ: 0.701 +============================================== +== scenario2-cltsel-full/07_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 0 out of 1 +Total number of medium or better clusters: 0 out of 1 +Total number of high quality clusters: 0 out of 1 + +First acceptable cluster - rank: i-RMSD: Fnat: DockQ: +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: - i-RMSD: 8.237 Fnat: 0.104 DockQ: 0.121 +============================================== +== scenario2-cltsel-full/09_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 0 out of 1 +Total number of medium or better clusters: 0 out of 1 +Total number of high quality clusters: 0 out of 1 + +First acceptable cluster - rank: i-RMSD: Fnat: DockQ: +First medium cluster - rank: i-RMSD: Fnat: DockQ: +Best cluster - rank: - i-RMSD: 4.840 Fnat: 0.361 DockQ: 0.400 +============================================== +== scenario2-cltsel-full/12_caprieval/capri_clt.tsv +============================================== +Total number of acceptable or better clusters: 4 out of 25 +Total number of medium or better clusters: 2 out of 25 +Total number of high quality clusters: 2 out of 25 + +First acceptable cluster - rank: 1 i-RMSD: 3.857 Fnat: 0.319 DockQ: 0.365 +First medium cluster - rank: 2 i-RMSD: 0.982 Fnat: 0.805 DockQ: 0.816 +Best cluster - rank: 4 i-RMSD: 0.980 Fnat: 0.812 DockQ: 0.819 ++
+ +Similarly some simple statistics can be extracted from the single model `caprieval` `capri_ss.tsv` files with the `extract-capri-stats.sh` script: + + +./scripts/extract-capri-stats.sh ./runs/scenario2 + + +++ ++View the output of the script expand_more +
++============================================== +== scenario2-cltsel-full/02_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 365 out of 1000 +Total number of medium or better models: 199 out of 1000 +Total number of high quality models: 0 out of 1000 + +First acceptable model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +First medium model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +Best model - rank: 46 i-RMSD: 1.145 Fnat: 0.556 DockQ: 0.713 +============================================== +== scenario2-cltsel-full/05_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 62 out of 375 +Total number of medium or better models: 22 out of 375 +Total number of high quality models: 0 out of 375 + +First acceptable model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +First medium model - rank: 3 i-RMSD: 1.153 Fnat: 0.556 DockQ: 0.711 +Best model - rank: 46 i-RMSD: 1.145 Fnat: 0.556 DockQ: 0.713 +============================================== +== scenario2-cltsel-full/07_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 74 out of 375 +Total number of medium or better models: 27 out of 375 +Total number of high quality models: 1 out of 375 + +First acceptable model - rank: 6 i-RMSD: 1.081 Fnat: 0.750 DockQ: 0.771 +First medium model - rank: 6 i-RMSD: 1.081 Fnat: 0.750 DockQ: 0.771 +Best model - rank: 36 i-RMSD: 0.930 Fnat: 0.778 DockQ: 0.822 +============================================== +== scenario2-cltsel-full/09_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 74 out of 375 +Total number of medium or better models: 27 out of 375 +Total number of high quality models: 7 out of 375 + +First acceptable model - rank: 1 i-RMSD: 3.718 Fnat: 0.333 DockQ: 0.382 +First medium model - rank: 3 i-RMSD: 0.991 Fnat: 0.806 DockQ: 0.821 +Best model - rank: 60 i-RMSD: 0.896 Fnat: 0.778 DockQ: 0.828 +============================================== +== scenario2-cltsel-full/12_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 65 out of 317 +Total number of medium or better models: 27 out of 317 +Total number of high quality models: 7 out of 317 + +First acceptable model - rank: 1 i-RMSD: 3.718 Fnat: 0.333 DockQ: 0.382 +First medium model - rank: 3 i-RMSD: 0.991 Fnat: 0.806 DockQ: 0.821 +Best model - rank: 54 i-RMSD: 0.896 Fnat: 0.778 DockQ: 0.828 ++
+ +_**Note**_ that this kind of analysis only makes sense when we know the reference complex and for benchmarking / performance analysis purposes. + +Look at the single structure statistics provided by the script + +How does the quality of the model changes after flexible refinement? Consider here the various metrics. + +++ ++ Answer expand_more +
++ In this case we observe a small improvement in terms of iRMSD values as well as in the fraction of native contacts and the DockQ scores. Also the single model rankings have improved, but the top ranked model is not the best one. +
+
+ +Is the best model always rank as first? + +++ ++ Answer expand_more +
++ This is clearly not the case. The scoring function is not perfect, but does a reasonable job in ranking models of acceptable or better quality on top in this case. +
+
+ +#### Analysis scenario 2: visualising the scores and their components + +We have precalculated a number of interactive plots to visualize the scores and their components versus ranks and model quality. + + +Examine the plots (remember here that higher DockQ values and lower i-RMSD values correspond to better models) + + +Models statistics: + +* [iRMSD versus HADDOCK score](plots/scenario2/irmsd_score.html){:target="_blank"} +* [DockQ versus HADDOCK score](plots/scenario2/dockq_score.html){:target="_blank"} + +Cluster statistics (distributions of values per cluster ordered according to their HADDOCK rank): + +* [HADDOCK scores](plots/scenario2/score_clt.html){:target="_blank"} +* [iRMSD](plots/scenario2/irmsd_clt.html){:target="_blank"} +* [DockQ](plots/scenario2/dockq_clt.html){:target="_blank"} + +
+ +### Comparing the performance of the two scenarios + +Clearly all three scenarios give good results with an acceptable cluster in all three cases ranked at the top: + +{% highlight shell %} +============================================== +== scenario1-full/11_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 141 out of 185 +Total number of medium or better models: 116 out of 185 +Total number of high quality models: 34 out of 185 + +First acceptable model - rank: 1 i-RMSD: 0.907 Fnat: 0.917 DockQ: 0.871 +First medium model - rank: 1 i-RMSD: 0.907 Fnat: 0.917 DockQ: 0.871 +Best model - rank: 36 i-RMSD: 0.807 Fnat: 0.833 DockQ: 0.862 + +============================================== +== scenario2-cltsel-full/12_caprieval/capri_ss.tsv +============================================== +Total number of acceptable or better models: 65 out of 317 +Total number of medium or better models: 27 out of 317 +Total number of high quality models: 7 out of 317 + +First acceptable model - rank: 1 i-RMSD: 3.718 Fnat: 0.333 DockQ: 0.382 +First medium model - rank: 3 i-RMSD: 0.991 Fnat: 0.806 DockQ: 0.821 +Best model - rank: 54 i-RMSD: 0.896 Fnat: 0.778 DockQ: 0.828 + +{% endhighlight %} + +While the first two scenarios show similar results, we can observe that scenario 2 produces a higher count of clusters, i.e. a higher conformational diversity than the other scenarios. +This difference is most probably a consequence of the clustering step carried out after the rigidbody docking. In fact, this additional step allowed us to select the best models of each clusters, retaining the diversity produced in the riigid body step, while selecting the overall best ranked models in the first two scenarios showed lower diversity. + +
+
+ +## Biological insights + +The E2A-HPR complex is involved in phosphate-transfer, in which a phosphate group attached to histidine 90 of E2A (which we named NEP) is transferred to a histidine of HPR. As such, the docking models should make sense according to this information, meaning that two histidines should be in close proximity at the interface. Using PyMOL, check the various cluster representatives (we are assuming here you have performed all PyMOL commands of the previous section): + + +select histidines, resn HIS+NEP
+show spheres, histidines
+util.cnc
+ + +First of all, has the phosphate group been properly generated? + +**Note:** You can zoom on the phosphorylated histidine using the following PyMOL command: + + +zoom resn NEP
+ + ++ + +Zoom back to all visible molecules with + + +zoom vis+
+ + +Now inspect each cluster in turn and check if histidine 90 of E2A is in close proximity to another histidine of HPR. +To facilitate this analysis, view each cluster in turn (use the right panel to activate/desactivate the various clusters by clicking on their name). + +Based on this analysis, which cluster does satisfy best the biolocal information? + +Is this cluster also the best ranked one? + +
+ +## Comparison with the reference structure + +As explained in the introduction, the structure of the native complex has been determined by NMR (PDB ID [1GGR](https://www.ebi.ac.uk/pdbe/entry/pdb/1ggr){:target="_blank"}) using a combination of intermolecular NOEs and dipolar coupling restraints. We will now compare the docking models with this structure. + +If you still have all cluster representative open in PyMOL you can proceed with the sub-sequent analysis, otherwise load again each cluster representative as described above. Then, fetch the reference complex by typing in PyMOL: + + +fetch 1GGR
+show cartoon
+color yellow, 1GGR and chain A
+color orange, 1GGR and chain B
+ + +The number of chain B in this structure is however different from the HPR numbering in the structure we used: It starts at 301 while in our models chain B starts at 1. We can change the residue numbering easily in PyMol with the following command: + + +alter (chain B and 1GGR), resv -=300
+ + +Then superimpose all cluster representatives on the reference structure, using the entire chain A (E2A): + + +select 1GGR and chain A
+alignto sele
+ + + +Does any of the cluster representatives ressemble the reference NMR structure? + + +In case you found a reasonable prediction, what is its cluster rank? + + +
+
+ +## Congratulations! 🎉 + +You have completed this tutorial. If you have any questions or suggestions, feel free to contact us via email or asking a question through our [support center](https://ask.bioexcel.eu){:target="_blank"}. + +And check also our [education](/education) web page where you will find more tutorials! + +
+
+ +## A look into the future Virtual Research Environment for HADDOCK3 + +In the context of a project with the [Netherlands e-Science Center](https://www.esciencecenter.nl){:target="_blank"} we are working on +building a Virtual Research Environment (VRE) for HADDOCK3 that will allow you to build and edit custom workflows, +execute those on a variety of infrastructures (grid, cloud, local, HPC) and provide an interactive analysis +platform for analyzing your HADDOCK3 results. This is _work in progress_ but you can already take a glimpse of the +first component, the workflow builder, [here](https://github.com/i-VRESSE/workflow-builder){:target="_blank"}. + +All the HADDOCK3 VRE software development is open and can be followed from our [GitHub i-VRESSE](https://github.com/i-VRESSE){:target="_blank"} repository. + +So stay tuned! + + +[air-help]: https://www.bonvinlab.org/software/haddock2.4/airs/ "AIRs help" +[gentbl]: https://wenmr.science.uu.nl/gentbl/ "GenTBL" +[haddock24protein]: /education/HADDOCK24/HADDOCK24-protein-protein-basic/ +[haddock-repo]: https://github.com/haddocking/haddock3 "HADDOCK3 GitHub" +[haddock-tools]: https://github.com/haddocking/haddock-tools "HADDOCK tools GitHub" +[installation]: https://www.bonvinlab.org/haddock3/INSTALL.html "Installation" +[link-cns]: https://cns-online.org "CNS online" +[link-forum]: https://ask.bioexcel.eu/c/haddock "HADDOCK Forum" +[link-pdbtools]:http://www.bonvinlab.org/pdb-tools/ "PDB-Tools" +[link-pymol]: https://www.pymol.org/ "PyMOL" +[nat-pro]: https://www.nature.com/nprot/journal/v5/n5/abs/nprot.2010.32.html "Nature protocol" +[tbl-examples]: https://github.com/haddocking/haddock-tools/tree/master/haddock_tbl_validation "tbl examples" diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/dockq_clt.html b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/dockq_clt.html new file mode 100644 index 000000000..bd35873ea --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/dockq_clt.html @@ -0,0 +1,23 @@ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/dockq_score.html b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/dockq_score.html new file mode 100644 index 000000000..813da8f61 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/dockq_score.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/irmsd_clt.html b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/irmsd_clt.html new file mode 100644 index 000000000..99b2c1f60 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/irmsd_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/irmsd_score.html b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/irmsd_score.html new file mode 100644 index 000000000..c8e5f32b9 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/irmsd_score.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/score_clt.html b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/score_clt.html new file mode 100644 index 000000000..0bfa12a54 --- /dev/null +++ b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/GOGO_scenario2/score_clt.html @@ -0,0 +1,23 @@ + +++ + ++ + ++ \ No newline at end of file diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2024/plots/score_clt.html b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/scenario1/dockq_clt.html similarity index 62% rename from education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2024/plots/score_clt.html rename to education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/scenario1/dockq_clt.html index b6887ff74..8f89b692d 100644 --- a/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2024/plots/score_clt.html +++ b/education/HADDOCK3/HADDOCK3-protein-protein-basic/plots/scenario1/dockq_clt.html @@ -5,7 +5,7 @@++ + ++ +++ + +