Spaces:
Sleeping
Sleeping
File size: 5,432 Bytes
85bd48b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Functions for processing confidence metrics."""
from typing import Dict, Optional, Tuple
import numpy as np
import scipy.special
def compute_plddt(logits: np.ndarray) -> np.ndarray:
"""Computes per-residue pLDDT from logits.
Args:
logits: [num_res, num_bins] output from the PredictedLDDTHead.
Returns:
plddt: [num_res] per-residue pLDDT.
"""
num_bins = logits.shape[-1]
bin_width = 1.0 / num_bins
bin_centers = np.arange(start=0.5 * bin_width, stop=1.0, step=bin_width)
probs = scipy.special.softmax(logits, axis=-1)
predicted_lddt_ca = np.sum(probs * bin_centers[None, :], axis=-1)
return predicted_lddt_ca * 100
def _calculate_bin_centers(breaks: np.ndarray):
"""Gets the bin centers from the bin edges.
Args:
breaks: [num_bins - 1] the error bin edges.
Returns:
bin_centers: [num_bins] the error bin centers.
"""
step = (breaks[1] - breaks[0])
# Add half-step to get the center
bin_centers = breaks + step / 2
# Add a catch-all bin at the end.
bin_centers = np.concatenate([bin_centers, [bin_centers[-1] + step]],
axis=0)
return bin_centers
def _calculate_expected_aligned_error(
alignment_confidence_breaks: np.ndarray,
aligned_distance_error_probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Calculates expected aligned distance errors for every pair of residues.
Args:
alignment_confidence_breaks: [num_bins - 1] the error bin edges.
aligned_distance_error_probs: [num_res, num_res, num_bins] the predicted
probs for each error bin, for each pair of residues.
Returns:
predicted_aligned_error: [num_res, num_res] the expected aligned distance
error for each pair of residues.
max_predicted_aligned_error: The maximum predicted error possible.
"""
bin_centers = _calculate_bin_centers(alignment_confidence_breaks)
# Tuple of expected aligned distance error and max possible error.
return (np.sum(aligned_distance_error_probs * bin_centers, axis=-1),
np.asarray(bin_centers[-1]))
def compute_predicted_aligned_error(
logits: np.ndarray,
breaks: np.ndarray) -> Dict[str, np.ndarray]:
"""Computes aligned confidence metrics from logits.
Args:
logits: [num_res, num_res, num_bins] the logits output from
PredictedAlignedErrorHead.
breaks: [num_bins - 1] the error bin edges.
Returns:
aligned_confidence_probs: [num_res, num_res, num_bins] the predicted
aligned error probabilities over bins for each residue pair.
predicted_aligned_error: [num_res, num_res] the expected aligned distance
error for each pair of residues.
max_predicted_aligned_error: The maximum predicted error possible.
"""
aligned_confidence_probs = scipy.special.softmax(
logits,
axis=-1)
predicted_aligned_error, max_predicted_aligned_error = (
_calculate_expected_aligned_error(
alignment_confidence_breaks=breaks,
aligned_distance_error_probs=aligned_confidence_probs))
return {
'aligned_confidence_probs': aligned_confidence_probs,
'predicted_aligned_error': predicted_aligned_error,
'max_predicted_aligned_error': max_predicted_aligned_error,
}
def predicted_tm_score(
logits: np.ndarray,
breaks: np.ndarray,
residue_weights: Optional[np.ndarray] = None) -> np.ndarray:
"""Computes predicted TM alignment score.
Args:
logits: [num_res, num_res, num_bins] the logits output from
PredictedAlignedErrorHead.
breaks: [num_bins] the error bins.
residue_weights: [num_res] the per residue weights to use for the
expectation.
Returns:
ptm_score: the predicted TM alignment score.
"""
# residue_weights has to be in [0, 1], but can be floating-point, i.e. the
# exp. resolved head's probability.
if residue_weights is None:
residue_weights = np.ones(logits.shape[0])
bin_centers = _calculate_bin_centers(breaks)
num_res = np.sum(residue_weights)
# Clip num_res to avoid negative/undefined d0.
clipped_num_res = max(num_res, 19)
# Compute d_0(num_res) as defined by TM-score, eqn. (5) in
# http://zhanglab.ccmb.med.umich.edu/papers/2004_3.pdf
# Yang & Skolnick "Scoring function for automated
# assessment of protein structure template quality" 2004
d0 = 1.24 * (clipped_num_res - 15) ** (1./3) - 1.8
# Convert logits to probs
probs = scipy.special.softmax(logits, axis=-1)
# TM-Score term for every bin
tm_per_bin = 1. / (1 + np.square(bin_centers) / np.square(d0))
# E_distances tm(distance)
predicted_tm_term = np.sum(probs * tm_per_bin, axis=-1)
normed_residue_mask = residue_weights / (1e-8 + residue_weights.sum())
per_alignment = np.sum(predicted_tm_term * normed_residue_mask, axis=-1)
return np.asarray(per_alignment[(per_alignment * residue_weights).argmax()])
|