From a1bceddc8ecf30a6a84b1ee283e1dbe79e0431da Mon Sep 17 00:00:00 2001 From: Brennan Abanades Date: Mon, 16 Aug 2021 19:57:30 +0100 Subject: [PATCH] Made util more readable and a little faster --- ABlooper/utils.py | 22 ++++++++++++++++++++-- README.md | 7 ++++--- setup.py | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/ABlooper/utils.py b/ABlooper/utils.py index fd3aa49..3a3862d 100644 --- a/ABlooper/utils.py +++ b/ABlooper/utils.py @@ -41,14 +41,20 @@ def one_hot(num_list, classes=20): def filt(x, chain, loop_range): - if x[:4] == "ATOM" and x.split()[4] == chain: - if loop_range[0] <= int(re.sub("[^0-9]", "", x.split()[5])) <= loop_range[1]: + """ Function to select residues in a certain chain within a given range. + + If the pdb line contains an atom belonging to the desired chain within the range it returns True. + """ + if x[:4] == "ATOM" and x[21] == chain: + if loop_range[0] <= int(x[22:26]) <= loop_range[1]: return True return False def positional_encoding(sequence, n=5): + """ Gives the network information on how close each resdiue is to the anchors + """ encs = [] L = len(sequence) for i in range(n): @@ -59,6 +65,10 @@ def positional_encoding(sequence, n=5): def res_to_atom(amino, n_atoms=4): + """ Adds a one-hot encoded vector to each node describing what atom type it is. + + It also reshapes the input tensor. + """ residue_feat = rearrange(amino, "i d -> i () d") atom_type = rearrange(torch.eye(n_atoms, device=amino.device), "a d -> () a d") @@ -69,6 +79,8 @@ def res_to_atom(amino, n_atoms=4): def which_loop(loop_seq, cdr): + """ Adds a one-hot encoded vector to each node describing which CDR it belongs to. + """ CDRs = ["H1", "H2", "H3", "L1", "L2", "L3", "Anchor"] loop = np.zeros((len(loop_seq), len(CDRs))) loop[:, -1] = 1 @@ -78,10 +90,14 @@ def which_loop(loop_seq, cdr): def rmsd(loop1, loop2): + """ Simple rmsd calculation for numpy arrays. + """ return np.sqrt(np.mean(((loop1 - loop2) ** 2).sum(-1))) def to_pdb_line(atom_id, atom_type, amino_type, chain_ID, residue_id, coords): + """Puts all the required info into a .pdb format + """ x, y, z = coords insertion = "$" if type(residue_id) is str: @@ -97,6 +113,8 @@ def to_pdb_line(atom_id, atom_type, amino_type, chain_ID, residue_id, coords): def prepare_input_loop(CDR_coords, CDR_seq, CDR): + """ Generates input features to be fed into the network + """ CDR_input_coords = copy.deepcopy(CDR_coords) CDR_input_coords[1:-1] = np.linspace(CDR_coords[1], CDR_coords[-2], len(CDR_coords) - 2) CDR_input_coords = rearrange(torch.tensor(CDR_input_coords), "i a d -> () (i a) d").float() diff --git a/README.md b/README.md index ab23f0c..a577acd 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ # ABlooper -Antibodies are a key component of the immune system and have been extensively used as biotherapeutics. Accurate knowledge of their structure is central to understanding their function. The key area for antigen binding and the main area of structural variation in antibodies are concentrated in their six complementarity determining regions (CDRs), with the most variable being the CDR-H3 loop. The sequence and structure variability of CDR-H3 make it particularly challenging to model. Recently, deep learning methods have offered a step change in our ability to predict protein structures. In this work we present ABlooper, an end-to-end equivariant deep-learning based CDR loop structure prediction tool. ABlooper predicts the structure of CDR loops with high accuracy and provides a confidence estimate for each of its predictions. On the models of the Rosetta Antibody Benchmark, ABlooper makes predictions with an average H3 RMSD of 2.45Å, which drops to 2.02Å when considering only its 76\% most confident predictions. + +Antibodies are a key component of the immune system and have been extensively used as biotherapeutics. Accurate knowledge of their structure is central to understanding their antigen binding function. The key area for antigen binding and the main area of structural variation in antibodies is concentrated in the six complementarity determining regions (CDRs), with the most important for binding and most variable being the CDR-H3 loop. The sequence and structural variability of CDR-H3 make it particularly challenging to model. Recently deep learning methods have offered a step change in our ability to predict protein structures. In this work we present ABlooper, an end-to-end equivariant deep-learning based CDR loop structure prediction tool. ABlooper rapidly predicts the structure of CDR loops with high accuracy and provides a confidence estimate for each of its predictions. On the models of the Rosetta Antibody Benchmark, ABlooper makes predictions with an average CDR-H3 RMSD of 2.45Å, which drops to 2.02Å when considering only its 76% most confident predictions. ## Install -To install via PyPi +To install via PyPi: ```bash $ pip install ABlooper @@ -41,7 +42,7 @@ pred.write_predictions_in_pdb_format(output_path) ``` -I would recommend using the command line if you just want a quick antibody model. If speed is a priority, it is probably best to just use the trained pytorch module, as reading and writting a pdb file is slower that running the model. The python class will work best if you want to incorporate CDR prediction into a pipeline or access other details such as confidence score or RMSD to original model. Both of which can be obtained as follows: +I would recommend using the command line if you just want a quick antibody model. If speed is a priority, it is probably best to just use the trained pytorch model. The python class will work best if you want to incorporate CDR prediction into a pipeline or access other details such as confidence score or RMSD to original model. Both of which can be obtained as follows: ```python diff --git a/setup.py b/setup.py index 1f43e5c..3b02e45 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ long_description = fh.read() setup( name='ABlooper', - version='1.0.2', + version='1.0.3', description='Set of functions to predict CDR structure', license='BSD 3-clause license', maintainer='Brennan Abanades',