diff --git a/docs/other/20201013_opencadd_new_features_slides.ipynb b/docs/other/20201013_opencadd_new_features_slides.ipynb index db431a83..70598822 100644 --- a/docs/other/20201013_opencadd_new_features_slides.ipynb +++ b/docs/other/20201013_opencadd_new_features_slides.ipynb @@ -108,13 +108,17 @@ "Some of the class method options:\n", "\n", "| | kinases | ligands | structures | bioactivities | interactions | pockets |\n", - "|:--------------------------| - | - | - | - | - | - |\n", - "| __by_kinase_klifs_id__ | x | x | x | x | x | | \n", - "| __by_kinase_name__ | x | x | x | | | |\n", - "| __by_ligand_klifs_id__ | | x | x | x | x | |\n", - "| __by_ligand_expo_id__ | | x | x | | | |\n", - "| __by_structure_klifs_id__ | | | x | | x | x |\n", - "| __by_structure_pdb_id__ | | | x | | | |" + "|:--------------------------| - | - | - | - | - | - |\n", + "| __by_kinase_klifs_id__ | x* | x* | x* | | x | | \n", + "| __by_kinase_name__ | x* | x | x | | | |\n", + "| __by_ligand_klifs_id__ | | x* | x | x\\* \\** | x | |\n", + "| __by_ligand_expo_id__ | | x | x | x\\* \\** | | |\n", + "| __by_structure_klifs_id__ | | | x* | | x* | x* |\n", + "| __by_structure_pdb_id__ | | | x* | | | |\n", + "\n", + " \\* Direct use of KLIFS Swagger API.\n", + " \n", + " \\** KLIFS Swagger API allows only ONE input value.\n" ] }, { diff --git a/docs/other/klifs_kinase_names.ipynb b/docs/other/klifs_kinase_names.ipynb deleted file mode 100644 index 594994db..00000000 --- a/docs/other/klifs_kinase_names.ipynb +++ /dev/null @@ -1,2036 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# KLIFS kinase names\n", - "\n", - "Explore different kinase name columns." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:opencadd.databases.klifs.api:If you want to see an non-truncated version of the DataFrames in this module, use `pd.set_option('display.max_columns', 50)` in your notebook.\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "from opencadd.databases.klifs import setup_remote, setup_local" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "pd.set_option('display.max_columns', 50)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:opencadd.databases.klifs.api:Set up remote session...\n", - "INFO:opencadd.databases.klifs.api:Remote session is ready!\n" - ] - } - ], - "source": [ - "remote = setup_remote()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Kinase details 1 (short version)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.hgnc_namekinase.full_namespecies.klifs
01AKT1NoneHuman
12AKT2NoneHuman
23AKT3NoneHuman
34CITNoneHuman
45DMPKNoneHuman
\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.hgnc_name kinase.full_name species.klifs\n", - "0 1 AKT1 None Human\n", - "1 2 AKT2 None Human\n", - "2 3 AKT3 None Human\n", - "3 4 CIT None Human\n", - "4 5 DMPK None Human" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases1 = remote.kinases.all_kinases()\n", - "kinases1.sort_values(\"kinase.id\", inplace=True)\n", - "kinases1.reset_index(drop=True, inplace=True)\n", - "kinases1.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Kinase details 2 (long details)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of IDs: 1127\n" - ] - } - ], - "source": [ - "kinase_ids = kinases1[\"kinase.id\"].to_list()\n", - "print(f\"Number of IDs: {len(kinase_ids)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of kinases: 1127\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.klifs_namekinase.hgnc_namekinase.familykinase.groupkinase.classspecies.klifskinase.full_namekinase.uniprotkinase.iupharkinase.pocket
01AKT1AKT1AktAGCHumanNoneP317491479KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...
12AKT2AKT2AktAGCHumanNoneP317511480KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME...
23AKT3AKT3AktAGCHumanNoneQ9Y2432286KLLGKGTFGKVILYAMKILHTLTESRVLKNTRPFLTSLKYSCFVME...
34CRIKCITDMPKAGCCRIKHumanNoneO145781509SLVGCGHFAEVQVYAMKVMFFEEERNILSRSTPWIPQLQYAYLVME...
45DMPK1DMPKDMPKAGCGEKHumanNoneQ090131505KVIGRGAFSEVAVYAMKIMCFREERDVLVNGDRWITQLHFAYLVME...
\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.klifs_name kinase.hgnc_name kinase.family kinase.group \\\n", - "0 1 AKT1 AKT1 Akt AGC \n", - "1 2 AKT2 AKT2 Akt AGC \n", - "2 3 AKT3 AKT3 Akt AGC \n", - "3 4 CRIK CIT DMPK AGC \n", - "4 5 DMPK1 DMPK DMPK AGC \n", - "\n", - " kinase.class species.klifs kinase.full_name kinase.uniprot kinase.iuphar \\\n", - "0 Human None P31749 1479 \n", - "1 Human None P31751 1480 \n", - "2 Human None Q9Y243 2286 \n", - "3 CRIK Human None O14578 1509 \n", - "4 GEK Human None Q09013 1505 \n", - "\n", - " kinase.pocket \n", - "0 KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME... \n", - "1 KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME... \n", - "2 KLLGKGTFGKVILYAMKILHTLTESRVLKNTRPFLTSLKYSCFVME... \n", - "3 SLVGCGHFAEVQVYAMKVMFFEEERNILSRSTPWIPQLQYAYLVME... \n", - "4 KVIGRGAFSEVAVYAMKIMCFREERDVLVNGDRWITQLHFAYLVME... " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases2 = remote.kinases.by_kinase_ids(kinase_ids)\n", - "print(f\"Number of kinases: {kinases2.shape[0]}\")\n", - "kinases2.sort_values(\"kinase.id\", inplace=True)\n", - "kinases2.reset_index(drop=True, inplace=True)\n", - "kinases2.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### How many kinases have unambiguous name?" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(817, 11)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases2[kinases2.apply(lambda x: x[\"kinase.klifs_name\"] != x[\"kinase.hgnc_name\"], axis=1)].shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Which columns are matched for kinase name?" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "fc50536f196b44d9a34b40c5eda54d51", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.klifs_namekinase.hgnc_namekinase.familykinase.groupkinase.classspecies.klifskinase.full_namekinase.uniprotkinase.iupharkinase.pocket
04CRIKCITDMPKAGCCRIKHumanNoneO145781509SLVGCGHFAEVQVYAMKVMFFEEERNILSRSTPWIPQLQYAYLVME...
1637CRIKCitDMPKAGCMouseNoneP490250SLVGCGHFAEVQVYAMKIMFFEEERNILSRSTPWIPQLQYAYLVME...
\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.klifs_name kinase.hgnc_name kinase.family kinase.group \\\n", - "0 4 CRIK CIT DMPK AGC \n", - "1 637 CRIK Cit DMPK AGC \n", - "\n", - " kinase.class species.klifs kinase.full_name kinase.uniprot kinase.iuphar \\\n", - "0 CRIK Human None O14578 1509 \n", - "1 Mouse None P49025 0 \n", - "\n", - " kinase.pocket \n", - "0 SLVGCGHFAEVQVYAMKVMFFEEERNILSRSTPWIPQLQYAYLVME... \n", - "1 SLVGCGHFAEVQVYAMKIMFFEEERNILSRSTPWIPQLQYAYLVME... " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote.kinases.by_kinase_names(kinase_names='CRIK')" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0c212fdd7ad9455293e7bfee644d5c0e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.klifs_namekinase.hgnc_namekinase.familykinase.groupkinase.classspecies.klifskinase.full_namekinase.uniprotkinase.iupharkinase.pocket
04CRIKCITDMPKAGCCRIKHumanNoneO145781509SLVGCGHFAEVQVYAMKVMFFEEERNILSRSTPWIPQLQYAYLVME...
1637CRIKCitDMPKAGCMouseNoneP490250SLVGCGHFAEVQVYAMKIMFFEEERNILSRSTPWIPQLQYAYLVME...
\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.klifs_name kinase.hgnc_name kinase.family kinase.group \\\n", - "0 4 CRIK CIT DMPK AGC \n", - "1 637 CRIK Cit DMPK AGC \n", - "\n", - " kinase.class species.klifs kinase.full_name kinase.uniprot kinase.iuphar \\\n", - "0 CRIK Human None O14578 1509 \n", - "1 Mouse None P49025 0 \n", - "\n", - " kinase.pocket \n", - "0 SLVGCGHFAEVQVYAMKVMFFEEERNILSRSTPWIPQLQYAYLVME... \n", - "1 SLVGCGHFAEVQVYAMKIMFFEEERNILSRSTPWIPQLQYAYLVME... " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote.kinases.by_kinase_names(kinase_names='CIT')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "__Note__: Apparently, the kinase name is matched for `kinase.name` and `kinase.hgnc`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Differing `kinases.name` and `kinases.hgnc`?" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of differing names/HGNC: 817\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.klifs_namekinase.hgnc_name
3CRIKCIT
4DMPK1DMPK
5MRCKaCDC42BPA
6MRCKbCDC42BPB
7DMPK2CDC42BPG
\n", - "
" - ], - "text/plain": [ - " kinase.klifs_name kinase.hgnc_name\n", - "3 CRIK CIT\n", - "4 DMPK1 DMPK\n", - "5 MRCKa CDC42BPA\n", - "6 MRCKb CDC42BPB\n", - "7 DMPK2 CDC42BPG" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "diff2 = kinases2[kinases2.apply(lambda x: x[\"kinase.klifs_name\"] != x[\"kinase.hgnc_name\"], axis=1)]\n", - "print(f\"Number of differing names/HGNC: {diff2.shape[0]}\")\n", - "diff2[[\"kinase.klifs_name\", \"kinase.hgnc_name\"]].head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Merge details for kinases 1 and 2" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.hgnc_name_xkinase.full_name_xkinase.klifs_namekinase.hgnc_name_ykinase.full_name_y
01AKT1NoneAKT1AKT1None
12AKT2NoneAKT2AKT2None
23AKT3NoneAKT3AKT3None
34CITNoneCRIKCITNone
45DMPKNoneDMPK1DMPKNone
.....................
11221123Pip5k1aNonePip5k1aPip5k1aNone
11231124Map4k2NoneMap4k2Map4k2None
11241125Pan3NonePan3Pan3None
11251126Plk5NonePlk5Plk5None
11261127Efna2NoneEfna2Efna2None
\n", - "

1127 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.hgnc_name_x kinase.full_name_x kinase.klifs_name \\\n", - "0 1 AKT1 None AKT1 \n", - "1 2 AKT2 None AKT2 \n", - "2 3 AKT3 None AKT3 \n", - "3 4 CIT None CRIK \n", - "4 5 DMPK None DMPK1 \n", - "... ... ... ... ... \n", - "1122 1123 Pip5k1a None Pip5k1a \n", - "1123 1124 Map4k2 None Map4k2 \n", - "1124 1125 Pan3 None Pan3 \n", - "1125 1126 Plk5 None Plk5 \n", - "1126 1127 Efna2 None Efna2 \n", - "\n", - " kinase.hgnc_name_y kinase.full_name_y \n", - "0 AKT1 None \n", - "1 AKT2 None \n", - "2 AKT3 None \n", - "3 CIT None \n", - "4 DMPK None \n", - "... ... ... \n", - "1122 Pip5k1a None \n", - "1123 Map4k2 None \n", - "1124 Pan3 None \n", - "1125 Plk5 None \n", - "1126 Efna2 None \n", - "\n", - "[1127 rows x 6 columns]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases = kinases1.merge(kinases2, on=\"kinase.id\", how=\"left\")\n", - "kinases = kinases.iloc[:, [0, 1, 2, 4, 5, 10]]\n", - "kinases" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.hgnc_name_xkinase.full_name_xkinase.klifs_namekinase.hgnc_name_ykinase.full_name_y
248249MAPK14Nonep38aMAPK14None
\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.hgnc_name_x kinase.full_name_x kinase.klifs_name \\\n", - "248 249 MAPK14 None p38a \n", - "\n", - " kinase.hgnc_name_y kinase.full_name_y \n", - "248 MAPK14 None " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases[kinases[\"kinase.hgnc_name_x\"] == \"MAPK14\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.hgnc_name_xkinase.full_name_xkinase.klifs_namekinase.hgnc_name_ykinase.full_name_y
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [kinase.id, kinase.hgnc_name_x, kinase.full_name_x, kinase.klifs_name, kinase.hgnc_name_y, kinase.full_name_y]\n", - "Index: []" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases[kinases[\"kinase.klifs_name\"].isin([\"\", \" \", 0, \"0\", None])]" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.hgnc_name_xkinase.full_name_xkinase.klifs_namekinase.hgnc_name_ykinase.full_name_y
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [kinase.id, kinase.hgnc_name_x, kinase.full_name_x, kinase.klifs_name, kinase.hgnc_name_y, kinase.full_name_y]\n", - "Index: []" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases[kinases[\"kinase.hgnc_name_x\"].isin([\"\", \" \", 0, \"0\", None])]" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.hgnc_name_xkinase.full_name_xkinase.klifs_namekinase.hgnc_name_ykinase.full_name_y
528529A6NoneA6None
529530A6rNoneA6rNone
\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.hgnc_name_x kinase.full_name_x kinase.klifs_name \\\n", - "528 529 A6 None A6 \n", - "529 530 A6r None A6r \n", - "\n", - " kinase.hgnc_name_y kinase.full_name_y \n", - "528 None \n", - "529 None " - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kinases[kinases[\"kinase.hgnc_name_y\"].isin([\"\", \" \", 0, \"0\", None])]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Differing `kinase.name_full`?" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of differing full names: 0\n" - ] - } - ], - "source": [ - "diff1 = kinases[kinases.apply(lambda x: x[\"kinase.full_name_x\"] != x[\"kinase.full_name_y\"], axis=1)]\n", - "print(f\"Number of differing full names: {diff1.shape[0]}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Differing `kinase.hgnc`?" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of differing HGNC names: 2\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.idkinase.hgnc_name_xkinase.full_name_xkinase.klifs_namekinase.hgnc_name_ykinase.full_name_y
528529A6NoneA6None
529530A6rNoneA6rNone
\n", - "
" - ], - "text/plain": [ - " kinase.id kinase.hgnc_name_x kinase.full_name_x kinase.klifs_name \\\n", - "528 529 A6 None A6 \n", - "529 530 A6r None A6r \n", - "\n", - " kinase.hgnc_name_y kinase.full_name_y \n", - "528 None \n", - "529 None " - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "diff3 = kinases[kinases.apply(lambda x: x[\"kinase.hgnc_name_x\"] != x[\"kinase.hgnc_name_y\"], axis=1)]\n", - "print(f\"Number of differing HGNC names: {diff3.shape[0]}\")\n", - "diff3.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "__Note__: In case of kinases A6 and A6r, the HGNC column from `all_kinases` got non-HGNC entries?" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Local kinase details" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(11592, 15) (11592, 27)\n" - ] - } - ], - "source": [ - "from opencadd.databases.klifs.local import _LocalDatabaseGenerator\n", - "local = _LocalDatabaseGenerator()\n", - "klifs_export_path = \"data/KLIFS_export.20201020.csv.zip\"\n", - "klifs_export = local._from_klifs_export_file(klifs_export_path)\n", - "klifs_overview_path = \"data/overview.20201020.csv.zip\"\n", - "klifs_overview = local._from_klifs_overview_file(klifs_overview_path)\n", - "print(klifs_export.shape, klifs_overview.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.nameskinase.familykinase.groupstructure.pdbstructure.chainstructure.alternate_modelspecies.klifsligand.nameligand.pdbligand_allosteric.nameligand_allosteric.pdbstructure.dfgstructure.ac_helixkinase.hgnc_namekinase.klifs_name
0[MAPK14, p38a]MAPKCMGC1a9uA-Human4-[5-(4-FLUORO-PHENYL)-2-(4-METHANESULFINYL-PH...SB2--inout-likeMAPK14p38a
1[HCK]SrcTK1ad5A-HumanPHOSPHOAMINOPHOSPHONIC ACID-ADENYLATE ESTERANP--inoutHCKHCK
2[HCK]SrcTK1ad5B-HumanPHOSPHOAMINOPHOSPHONIC ACID-ADENYLATE ESTERANP--inout-likeHCKHCK
3[FGFR1]FGFRTK1agwAAHuman3-[4-(1-FORMYLPIPERAZIN-4-YL)-BENZYLIDENYL]-2-...SU2--inout-likeFGFR1FGFR1
4[FGFR1]FGFRTK1agwABHuman3-[4-(1-FORMYLPIPERAZIN-4-YL)-BENZYLIDENYL]-2-...SU2--inout-likeFGFR1FGFR1
\n", - "
" - ], - "text/plain": [ - " kinase.names kinase.family kinase.group structure.pdb structure.chain \\\n", - "0 [MAPK14, p38a] MAPK CMGC 1a9u A \n", - "1 [HCK] Src TK 1ad5 A \n", - "2 [HCK] Src TK 1ad5 B \n", - "3 [FGFR1] FGFR TK 1agw A \n", - "4 [FGFR1] FGFR TK 1agw A \n", - "\n", - " structure.alternate_model species.klifs \\\n", - "0 - Human \n", - "1 - Human \n", - "2 - Human \n", - "3 A Human \n", - "4 B Human \n", - "\n", - " ligand.name ligand.pdb \\\n", - "0 4-[5-(4-FLUORO-PHENYL)-2-(4-METHANESULFINYL-PH... SB2 \n", - "1 PHOSPHOAMINOPHOSPHONIC ACID-ADENYLATE ESTER ANP \n", - "2 PHOSPHOAMINOPHOSPHONIC ACID-ADENYLATE ESTER ANP \n", - "3 3-[4-(1-FORMYLPIPERAZIN-4-YL)-BENZYLIDENYL]-2-... SU2 \n", - "4 3-[4-(1-FORMYLPIPERAZIN-4-YL)-BENZYLIDENYL]-2-... SU2 \n", - "\n", - " ligand_allosteric.name ligand_allosteric.pdb structure.dfg \\\n", - "0 - - in \n", - "1 - - in \n", - "2 - - in \n", - "3 - - in \n", - "4 - - in \n", - "\n", - " structure.ac_helix kinase.hgnc_name kinase.klifs_name \n", - "0 out-like MAPK14 p38a \n", - "1 out HCK HCK \n", - "2 out-like HCK HCK \n", - "3 out-like FGFR1 FGFR1 \n", - "4 out-like FGFR1 FGFR1 " - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "klifs_export.sort_values([\"structure.pdb\", \"structure.chain\", \"structure.alternate_model\"], inplace=True, ignore_index=True)\n", - "klifs_export.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(4867, 15)" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "klifs_export[klifs_export[\"kinase.names\"].apply(len) == 2].shape" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
species.klifskinase.klifs_namestructure.pdbstructure.alternate_modelstructure.chainligand.pdbligand_allosteric.pdbstructure.rmsd1structure.rmsd2structure.qualityscorestructure.pocketstructure.resolutionstructure.missing_residuesstructure.missing_atomsinteraction.fingerprintstructure.fp_istructure.fp_iistructure.bp_i_astructure.bp_i_bstructure.bp_ii_instructure.bp_ii_a_instructure.bp_ii_b_instructure.bp_ii_outstructure.bp_ii_bstructure.bp_iiistructure.bp_ivstructure.bp_v
0Humanp38a1a9u-ASB2-0.8282.1868.0SPVGSGAYGSVCAVAVKKLRTYRELRLLKHMKENVIGLLDVYLVTH...2.5000000000000000000000000000000000000000000000000...0.01.01.01.00.00.00.00.00.00.00.00.0
1HumanHCK1ad5-AANP-0.8162.1419.6KKLGAGQFGEVWMVAVKTMAFLAEANVMKTLQDKLVKLHAVYIITE...2.6040000000000000010000000000000000000000000000000...0.00.00.00.00.00.00.00.00.00.00.00.0
2HumanHCK1ad5-BANP-0.8172.1419.6KKLGAGQFGEVWMVAVKTMAFLAEANVMKTLQDKLVKLHAVYIITE...2.6040000000000000010000001000000000000000000000000...0.00.00.00.00.00.00.00.00.00.00.00.0
3HumanFGFR11agwAASU2-0.8312.0017.6KPLG_____QVVLVAVKMLDLISEMEMMKMIGKNIINLLGAYVIVE...2.4540000000000000010000000000000000000000000000000...0.00.00.00.00.00.00.00.00.00.00.00.0
4HumanFGFR11agwBASU2-0.8312.0017.6KPLG_____QVVLVAVKMLDLISEMEMMKMIGKNIINLLGAYVIVE...2.4540000000000000010000000000000000000000000000000...0.00.00.00.00.00.00.00.00.00.00.00.0
\n", - "
" - ], - "text/plain": [ - " species.klifs kinase.klifs_name structure.pdb structure.alternate_model \\\n", - "0 Human p38a 1a9u - \n", - "1 Human HCK 1ad5 - \n", - "2 Human HCK 1ad5 - \n", - "3 Human FGFR1 1agw A \n", - "4 Human FGFR1 1agw B \n", - "\n", - " structure.chain ligand.pdb ligand_allosteric.pdb structure.rmsd1 \\\n", - "0 A SB2 - 0.828 \n", - "1 A ANP - 0.816 \n", - "2 B ANP - 0.817 \n", - "3 A SU2 - 0.831 \n", - "4 A SU2 - 0.831 \n", - "\n", - " structure.rmsd2 structure.qualityscore \\\n", - "0 2.186 8.0 \n", - "1 2.141 9.6 \n", - "2 2.141 9.6 \n", - "3 2.001 7.6 \n", - "4 2.001 7.6 \n", - "\n", - " structure.pocket structure.resolution \\\n", - "0 SPVGSGAYGSVCAVAVKKLRTYRELRLLKHMKENVIGLLDVYLVTH... 2.5 \n", - "1 KKLGAGQFGEVWMVAVKTMAFLAEANVMKTLQDKLVKLHAVYIITE... 2.6 \n", - "2 KKLGAGQFGEVWMVAVKTMAFLAEANVMKTLQDKLVKLHAVYIITE... 2.6 \n", - "3 KPLG_____QVVLVAVKMLDLISEMEMMKMIGKNIINLLGAYVIVE... 2.4 \n", - "4 KPLG_____QVVLVAVKMLDLISEMEMMKMIGKNIINLLGAYVIVE... 2.4 \n", - "\n", - " structure.missing_residues structure.missing_atoms \\\n", - "0 0 0 \n", - "1 0 4 \n", - "2 0 4 \n", - "3 5 4 \n", - "4 5 4 \n", - "\n", - " interaction.fingerprint structure.fp_i \\\n", - "0 0000000000000000000000000000000000000000000000... 0.0 \n", - "1 0000000000000010000000000000000000000000000000... 0.0 \n", - "2 0000000000000010000001000000000000000000000000... 0.0 \n", - "3 0000000000000010000000000000000000000000000000... 0.0 \n", - "4 0000000000000010000000000000000000000000000000... 0.0 \n", - "\n", - " structure.fp_ii structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", - "0 1.0 1.0 1.0 0.0 \n", - "1 0.0 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 0.0 \n", - "\n", - " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", - "0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 \n", - "\n", - " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \n", - "0 0.0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 0.0 " - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "klifs_overview.sort_values([\"structure.pdb\", \"structure.chain\", \"structure.alternate_model\"], inplace=True, ignore_index=True)\n", - "klifs_overview.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Kinase name mismatches in local overview and export files?" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.nameskinase.familykinase.groupstructure.pdbstructure.chainstructure.alternate_modelspecies.klifsligand.nameligand.pdbligand_allosteric.nameligand_allosteric.pdbstructure.dfgstructure.ac_helixkinase.hgnc_namekinase.klifs_name
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [kinase.names, kinase.family, kinase.group, structure.pdb, structure.chain, structure.alternate_model, species.klifs, ligand.name, ligand.pdb, ligand_allosteric.name, ligand_allosteric.pdb, structure.dfg, structure.ac_helix, kinase.hgnc_name, kinase.klifs_name]\n", - "Index: []" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "klifs_export[klifs_export[\"kinase.klifs_name\"] != klifs_overview[\"kinase.klifs_name\"]]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Kinase name mismatches locally and remotely?" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.nameskinase.familykinase.groupstructure.pdbstructure.chainstructure.alternate_modelspecies.klifsligand.nameligand.pdbligand_allosteric.nameligand_allosteric.pdbstructure.dfgstructure.ac_helixkinase.hgnc_namekinase.klifs_name
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [kinase.names, kinase.family, kinase.group, structure.pdb, structure.chain, structure.alternate_model, species.klifs, ligand.name, ligand.pdb, ligand_allosteric.name, ligand_allosteric.pdb, structure.dfg, structure.ac_helix, kinase.hgnc_name, kinase.klifs_name]\n", - "Index: []" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "klifs_export[~klifs_export[\"kinase.klifs_name\"].isin(kinases[\"kinase.klifs_name\"].to_list())]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Kinase HGNC name mismatches locally and remotely?" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
kinase.nameskinase.familykinase.groupstructure.pdbstructure.chainstructure.alternate_modelspecies.klifsligand.nameligand.pdbligand_allosteric.nameligand_allosteric.pdbstructure.dfgstructure.ac_helixkinase.hgnc_namekinase.klifs_name
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [kinase.names, kinase.family, kinase.group, structure.pdb, structure.chain, structure.alternate_model, species.klifs, ligand.name, ligand.pdb, ligand_allosteric.name, ligand_allosteric.pdb, structure.dfg, structure.ac_helix, kinase.hgnc_name, kinase.klifs_name]\n", - "Index: []" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "klifs_export[~klifs_export[\"kinase.hgnc_name\"].isin(kinases[\"kinase.hgnc_name_x\"].to_list())]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/tutorials/databases_klifs.ipynb b/docs/tutorials/databases_klifs.ipynb index 80262aa7..0dfb7ff6 100644 --- a/docs/tutorials/databases_klifs.ipynb +++ b/docs/tutorials/databases_klifs.ipynb @@ -8,8 +8,8 @@ "\n", "The `opencadd.databases.klifs` module offers to interact with KLIFS data \n", "\n", - "- __locally__ ([KLIFS data download](https://klifs.vu-compmedchem.nl/search.php)) and\n", - "- __remote__ ([KLIFS swagger API](https://klifs.vu-compmedchem.nl/swagger/)) \n", + "- __locally__ ([KLIFS data download](https://klifs.net/search.php)) and\n", + "- __remote__ ([KLIFS swagger API (beta)](https://klifs.net/swagger_v2/)) \n", "\n", "following the same API in its `local` and `remote` modules.\n", "\n", @@ -102,16 +102,7 @@ "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:opencadd.databases.klifs.api:Set up remote session...\n", - "INFO:opencadd.databases.klifs.api:Remote session is ready!\n" - ] - } - ], + "outputs": [], "source": [ "remote = setup_remote()" ] @@ -124,7 +115,7 @@ { "data": { "text/plain": [ - "SwaggerClient(https://klifs.net/api)" + "SwaggerClient(https://dev.klifs.net/api_v2)" ] }, "execution_count": 6, @@ -154,21 +145,7 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:opencadd.databases.klifs.api:Set up local session...\n", - "INFO:opencadd.databases.klifs.local:Load overview.csv...\n", - "INFO:opencadd.databases.klifs.local:Load KLIFS_export.csv...\n", - "INFO:opencadd.databases.klifs.local:Merge both csv files...\n", - "INFO:opencadd.databases.klifs.local:Add paths to coordinate folders to structures...\n", - "INFO:opencadd.databases.klifs.local:Add KLIFS IDs to structures (uses remote since not available locally!)...\n", - "INFO:opencadd.databases.klifs.api:Local session is ready!\n" - ] - } - ], + "outputs": [], "source": [ "local = setup_local(\"../../opencadd/tests/data/klifs\")" ] @@ -233,7 +210,7 @@ " structure.bp_iv\n", " structure.bp_v\n", " kinase.names\n", - " kinase.hgnc_name\n", + " kinase.gene_name\n", " kinase.klifs_name\n", " kinase.family\n", " kinase.group\n", @@ -263,18 +240,18 @@ " 4\n", " 0\n", " 0000000000000010000001000000000000000000000000...\n", - " 0\n", - " 0\n", - " 1\n", - " 1\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " [BMX]\n", " BMX\n", " BMX\n", @@ -304,18 +281,18 @@ " 7\n", " 61\n", " 0000000000000000000000000000000000000000000000...\n", - " 0\n", - " 0\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 1.0\n", + " 1.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " [BRAF]\n", " BRAF\n", " BRAF\n", @@ -330,163 +307,219 @@ " 509\n", " \n", " \n", - " 2\n", - " Mouse\n", - " 1fpu\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 4\n", + " Human\n", + " 7lht\n", " -\n", " A\n", - " PRC\n", " -\n", - " 0.925\n", - " 2.319\n", - " 8.8\n", - " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", - " 2.40\n", - " 0\n", - " 8\n", - " 0000000000000010000000000000000000000000000000...\n", - " 0\n", - " 0\n", - " 1\n", - " 1\n", - " 0\n", + " ATP\n", + " 10.267\n", + " 15.045\n", + " 4.0\n", + " KALGKGLFSMVIRITLKVVGLRILNLPHLILEYCKAKDIIRFLQQK...\n", + " 3.50\n", " 0\n", " 0\n", - " 1\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " [Abl1, ABL1]\n", - " Abl1\n", - " ABL1\n", - " Abl\n", - " TK\n", - " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " [LRRK2]\n", + " LRRK2\n", + " LRRK2\n", + " LRRK\n", + " TKL\n", " -\n", + " ADENOSINE-5'-TRIPHOSPHATE\n", " out\n", " out\n", - " MOUSE/ABL1/1fpu_chainA\n", - " 5728\n", - " 532\n", + " HUMAN/LRRK2/7lht_chainA\n", + " 13623\n", + " 495\n", " \n", " \n", - " 3\n", - " Mouse\n", - " 1fpu\n", + " 5\n", + " Human\n", + " 2ogv\n", " -\n", - " B\n", - " PRC\n", + " A\n", " -\n", - " 0.925\n", - " 2.329\n", - " 9.2\n", - " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", - " 2.40\n", - " 0\n", - " 4\n", - " 0000000000000010000000000000000000000000000000...\n", - " 0\n", - " 0\n", - " 0\n", - " 1\n", - " 0\n", - " 0\n", - " 0\n", - " 1\n", - " 0\n", - " 0\n", - " 0\n", + " -\n", + " 1.043\n", + " 2.362\n", + " 6.4\n", + " KTLGAGAFGKVVEVAVKMLALMSELKIMSHLGENIVNLLGALVITE...\n", + " 2.70\n", + " 2\n", " 0\n", - " [Abl1, ABL1]\n", - " Abl1\n", - " ABL1\n", - " Abl\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " [CSF1R, FMS]\n", + " CSF1R\n", + " FMS\n", + " PDGFR\n", " TK\n", - " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", + " -\n", " -\n", " out\n", - " out\n", - " MOUSE/ABL1/1fpu_chainB\n", - " 5705\n", - " 532\n", + " in\n", + " HUMAN/FMS/2ogv_chainA\n", + " 1243\n", + " 449\n", " \n", " \n", "\n", + "

6 rows × 38 columns

\n", "" ], "text/plain": [ - " species.klifs structure.pdb_id structure.alternate_model structure.chain \\\n", - "0 Human 3sxr - A \n", - "1 Human 6uuo - A \n", - "2 Mouse 1fpu - A \n", - "3 Mouse 1fpu - B \n", - "\n", - " ligand.expo_id ligand_allosteric.expo_id structure.rmsd1 structure.rmsd2 \\\n", - "0 1N1 - 0.839 1.967 \n", - "1 QH1 - 0.806 2.028 \n", - "2 PRC - 0.925 2.319 \n", - "3 PRC - 0.925 2.329 \n", - "\n", - " structure.qualityscore structure.pocket \\\n", - "0 6.4 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... \n", - "1 5.2 QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... \n", - "2 8.8 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... \n", - "3 9.2 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... \n", - "\n", - " structure.resolution structure.missing_residues structure.missing_atoms \\\n", - "0 2.40 4 0 \n", - "1 3.29 7 61 \n", - "2 2.40 0 8 \n", - "3 2.40 0 4 \n", - "\n", - " interaction.fingerprint structure.fp_i \\\n", - "0 0000000000000010000001000000000000000000000000... 0 \n", - "1 0000000000000000000000000000000000000000000000... 0 \n", - "2 0000000000000010000000000000000000000000000000... 0 \n", - "3 0000000000000010000000000000000000000000000000... 0 \n", - "\n", - " structure.fp_ii structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", - "0 0 1 1 0 \n", - "1 0 1 1 1 \n", - "2 0 1 1 0 \n", - "3 0 0 1 0 \n", - "\n", - " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", - "0 0 0 0 \n", - "1 1 0 0 \n", - "2 0 0 1 \n", - "3 0 0 1 \n", - "\n", - " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \\\n", - "0 0 0 0 0 \n", - "1 0 0 0 0 \n", - "2 0 0 0 0 \n", - "3 0 0 0 0 \n", - "\n", - " kinase.names kinase.hgnc_name kinase.klifs_name kinase.family kinase.group \\\n", - "0 [BMX] BMX BMX Tec TK \n", - "1 [BRAF] BRAF BRAF RAF TKL \n", - "2 [Abl1, ABL1] Abl1 ABL1 Abl TK \n", - "3 [Abl1, ABL1] Abl1 ABL1 Abl TK \n", - "\n", - " ligand.name ligand_allosteric.name \\\n", - "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... - \n", - "1 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf... - \n", - "2 N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]... - \n", - "3 N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]... - \n", - "\n", - " structure.dfg structure.ac_helix structure.filepath \\\n", - "0 out-like in HUMAN/BMX/3sxr_chainA \n", - "1 in out-like HUMAN/BRAF/6uuo_chainA \n", - "2 out out MOUSE/ABL1/1fpu_chainA \n", - "3 out out MOUSE/ABL1/1fpu_chainB \n", - "\n", - " structure.klifs_id kinase.klifs_id \n", - "0 3482 472 \n", - "1 12347 509 \n", - "2 5728 532 \n", - "3 5705 532 " + " species.klifs structure.pdb_id structure.alternate_model structure.chain \\\n", + "0 Human 3sxr - A \n", + "1 Human 6uuo - A \n", + ".. ... ... ... ... \n", + "4 Human 7lht - A \n", + "5 Human 2ogv - A \n", + "\n", + " ligand.expo_id ligand_allosteric.expo_id structure.rmsd1 structure.rmsd2 \\\n", + "0 1N1 - 0.839 1.967 \n", + "1 QH1 - 0.806 2.028 \n", + ".. ... ... ... ... \n", + "4 - ATP 10.267 15.045 \n", + "5 - - 1.043 2.362 \n", + "\n", + " structure.qualityscore structure.pocket \\\n", + "0 6.4 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... \n", + "1 5.2 QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... \n", + ".. ... ... \n", + "4 4.0 KALGKGLFSMVIRITLKVVGLRILNLPHLILEYCKAKDIIRFLQQK... \n", + "5 6.4 KTLGAGAFGKVVEVAVKMLALMSELKIMSHLGENIVNLLGALVITE... \n", + "\n", + " structure.resolution structure.missing_residues structure.missing_atoms \\\n", + "0 2.40 4 0 \n", + "1 3.29 7 61 \n", + ".. ... ... ... \n", + "4 3.50 0 0 \n", + "5 2.70 2 0 \n", + "\n", + " interaction.fingerprint structure.fp_i \\\n", + "0 0000000000000010000001000000000000000000000000... 0.0 \n", + "1 0000000000000000000000000000000000000000000000... 0.0 \n", + ".. ... ... \n", + "4 NaN NaN \n", + "5 NaN NaN \n", + "\n", + " structure.fp_ii structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", + "0 0.0 1.0 1.0 0.0 \n", + "1 0.0 1.0 1.0 1.0 \n", + ".. ... ... ... ... \n", + "4 NaN NaN NaN NaN \n", + "5 NaN NaN NaN NaN \n", + "\n", + " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", + "0 0.0 0.0 0.0 \n", + "1 1.0 0.0 0.0 \n", + ".. ... ... ... \n", + "4 NaN NaN NaN \n", + "5 NaN NaN NaN \n", + "\n", + " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \\\n", + "0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 \n", + ".. ... ... ... ... \n", + "4 NaN NaN NaN NaN \n", + "5 NaN NaN NaN NaN \n", + "\n", + " kinase.names kinase.gene_name kinase.klifs_name kinase.family \\\n", + "0 [BMX] BMX BMX Tec \n", + "1 [BRAF] BRAF BRAF RAF \n", + ".. ... ... ... ... \n", + "4 [LRRK2] LRRK2 LRRK2 LRRK \n", + "5 [CSF1R, FMS] CSF1R FMS PDGFR \n", + "\n", + " kinase.group ligand.name \\\n", + "0 TK N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... \n", + "1 TKL 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf... \n", + ".. ... ... \n", + "4 TKL - \n", + "5 TK - \n", + "\n", + " ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 - out-like in \n", + "1 - in out-like \n", + ".. ... ... ... \n", + "4 ADENOSINE-5'-TRIPHOSPHATE out out \n", + "5 - out in \n", + "\n", + " structure.filepath structure.klifs_id kinase.klifs_id \n", + "0 HUMAN/BMX/3sxr_chainA 3482 472 \n", + "1 HUMAN/BRAF/6uuo_chainA 12347 509 \n", + ".. ... ... ... \n", + "4 HUMAN/LRRK2/7lht_chainA 13623 495 \n", + "5 HUMAN/FMS/2ogv_chainA 1243 449 \n", + "\n", + "[6 rows x 38 columns]" ] }, "execution_count": 8, @@ -525,13 +558,15 @@ "metadata": {}, "source": [ "| | kinases | ligands | structures | bioactivities | interactions | pockets |\n", - "|:--------------------------| - | - | - | - | - | - |\n", - "| __by_kinase_klifs_id__ | x | x | x | x | x | | \n", - "| __by_kinase_name__ | x | x | x | | | |\n", - "| __by_ligand_klifs_id__ | | x | x | x | x | |\n", - "| __by_ligand_expo_id__ | | x | x | x | | |\n", - "| __by_structure_klifs_id__ | | | x | | x | x |\n", - "| __by_structure_pdb_id__ | | | x | | | |" + "|:--------------------------| - | - | - | - | - | - |\n", + "| __by_kinase_klifs_id__ | x* | x* | x* | | x | | \n", + "| __by_kinase_name__ | x* | x | x | | | |\n", + "| __by_ligand_klifs_id__ | | x* | x | x* | x | |\n", + "| __by_ligand_expo_id__ | | x | x | x* | | |\n", + "| __by_structure_klifs_id__ | | | x* | | x* | x* |\n", + "| __by_structure_pdb_id__ | | | x* | | | |\n", + "\n", + " \\* Direct use of KLIFS Swagger API." ] }, { @@ -859,7 +894,7 @@ } ], "source": [ - "remote.kinases.all_kinase_families(group=\"TK\")" + "remote.kinases.all_kinase_families(groups=\"TK\")" ] }, { @@ -877,7 +912,7 @@ ], "source": [ "try:\n", - " remote.kinases.all_kinase_families(group=\"XXX\")\n", + " remote.kinases.all_kinase_families(groups=\"XXX\")\n", "except SwaggerMappingError as e:\n", " print(e)" ] @@ -933,6 +968,14 @@ " 2\n", " Abl\n", " \n", + " \n", + " 3\n", + " LRRK\n", + " \n", + " \n", + " 4\n", + " PDGFR\n", + " \n", " \n", "\n", "" @@ -941,7 +984,9 @@ " kinase.family\n", "0 Tec\n", "1 RAF\n", - "2 Abl" + "2 Abl\n", + "3 LRRK\n", + "4 PDGFR" ] }, "execution_count": 14, @@ -991,6 +1036,10 @@ " 1\n", " Abl\n", " \n", + " \n", + " 2\n", + " PDGFR\n", + " \n", " \n", "\n", "" @@ -998,7 +1047,8 @@ "text/plain": [ " kinase.family\n", "0 Tec\n", - "1 Abl" + "1 Abl\n", + "2 PDGFR" ] }, "execution_count": 15, @@ -1007,7 +1057,7 @@ } ], "source": [ - "local.kinases.all_kinase_families(group=\"TK\")" + "local.kinases.all_kinase_families(groups=\"TK\")" ] }, { @@ -1071,24 +1121,30 @@ " \n", " \n", " kinase.klifs_id\n", - " kinase.hgnc_name\n", + " kinase.klifs_name\n", " kinase.full_name\n", + " kinase.gene_name\n", + " kinase.uniprot\n", " species.klifs\n", " \n", " \n", " \n", " \n", " 0\n", - " 813\n", - " 4921509C19Rik\n", - " RIKEN cDNA 4921509C19 gene\n", + " 529\n", + " A6\n", + " 0\n", + " \n", + " 0\n", " Mouse\n", " \n", " \n", " 1\n", - " 815\n", - " 4921509C19Rik\n", - " RIKEN cDNA 4921509C19 gene\n", + " 530\n", + " A6r\n", + " 0\n", + " \n", + " 0\n", " Mouse\n", " \n", " \n", @@ -1097,42 +1153,55 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", " \n", " \n", " 1125\n", - " 529\n", - " A6\n", - " 0\n", + " 1062\n", + " ZAP70\n", + " zeta-chain (TCR) associated protein kinase\n", + " Zap70\n", + " P43404\n", " Mouse\n", " \n", " \n", " 1126\n", - " 530\n", - " A6r\n", - " 0\n", - " Mouse\n", + " 471\n", + " ZAP70\n", + " zeta chain of T cell receptor associated prote...\n", + " ZAP70\n", + " P43403\n", + " Human\n", " \n", " \n", "\n", - "

1127 rows × 4 columns

\n", + "

1127 rows × 6 columns

\n", "" ], "text/plain": [ - " kinase.klifs_id kinase.hgnc_name kinase.full_name \\\n", - "0 813 4921509C19Rik RIKEN cDNA 4921509C19 gene \n", - "1 815 4921509C19Rik RIKEN cDNA 4921509C19 gene \n", - "... ... ... ... \n", - "1125 529 A6 0 \n", - "1126 530 A6r 0 \n", - "\n", - " species.klifs \n", - "0 Mouse \n", - "1 Mouse \n", - "... ... \n", - "1125 Mouse \n", - "1126 Mouse \n", - "\n", - "[1127 rows x 4 columns]" + " kinase.klifs_id kinase.klifs_name \\\n", + "0 529 A6 \n", + "1 530 A6r \n", + "... ... ... \n", + "1125 1062 ZAP70 \n", + "1126 471 ZAP70 \n", + "\n", + " kinase.full_name kinase.gene_name \\\n", + "0 0 \n", + "1 0 \n", + "... ... ... \n", + "1125 zeta-chain (TCR) associated protein kinase Zap70 \n", + "1126 zeta chain of T cell receptor associated prote... ZAP70 \n", + "\n", + " kinase.uniprot species.klifs \n", + "0 0 Mouse \n", + "1 0 Mouse \n", + "... ... ... \n", + "1125 P43404 Mouse \n", + "1126 P43403 Human \n", + "\n", + "[1127 rows x 6 columns]" ] }, "execution_count": 17, @@ -1171,8 +1240,10 @@ " \n", " \n", " kinase.klifs_id\n", - " kinase.hgnc_name\n", + " kinase.klifs_name\n", " kinase.full_name\n", + " kinase.gene_name\n", + " kinase.uniprot\n", " species.klifs\n", " \n", " \n", @@ -1180,36 +1251,46 @@ " \n", " 0\n", " 574\n", - " Bmx\n", + " BMX\n", " BMX non-receptor tyrosine kinase\n", + " Bmx\n", + " P97504\n", " Mouse\n", " \n", " \n", " 1\n", " 583\n", - " Btk\n", + " BTK\n", " Bruton agammaglobulinemia tyrosine kinase\n", + " Btk\n", + " P35991\n", " Mouse\n", " \n", " \n", " 2\n", " 739\n", - " Itk\n", + " ITK\n", " IL2 inducible T cell kinase\n", + " Itk\n", + " Q03526\n", " Mouse\n", " \n", " \n", " 3\n", " 1008\n", - " Tec\n", + " TEC\n", " tec protein tyrosine kinase\n", + " Tec\n", + " P24604\n", " Mouse\n", " \n", " \n", " 4\n", " 1039\n", - " Txk\n", + " TXK\n", " TXK tyrosine kinase\n", + " Txk\n", + " P42682\n", " Mouse\n", " \n", " \n", @@ -1217,19 +1298,26 @@ "" ], "text/plain": [ - " kinase.klifs_id kinase.hgnc_name \\\n", - "0 574 Bmx \n", - "1 583 Btk \n", - "2 739 Itk \n", - "3 1008 Tec \n", - "4 1039 Txk \n", - "\n", - " kinase.full_name species.klifs \n", - "0 BMX non-receptor tyrosine kinase Mouse \n", - "1 Bruton agammaglobulinemia tyrosine kinase Mouse \n", - "2 IL2 inducible T cell kinase Mouse \n", - "3 tec protein tyrosine kinase Mouse \n", - "4 TXK tyrosine kinase Mouse " + " kinase.klifs_id kinase.klifs_name \\\n", + "0 574 BMX \n", + "1 583 BTK \n", + "2 739 ITK \n", + "3 1008 TEC \n", + "4 1039 TXK \n", + "\n", + " kinase.full_name kinase.gene_name kinase.uniprot \\\n", + "0 BMX non-receptor tyrosine kinase Bmx P97504 \n", + "1 Bruton agammaglobulinemia tyrosine kinase Btk P35991 \n", + "2 IL2 inducible T cell kinase Itk Q03526 \n", + "3 tec protein tyrosine kinase Tec P24604 \n", + "4 TXK tyrosine kinase Txk P42682 \n", + "\n", + " species.klifs \n", + "0 Mouse \n", + "1 Mouse \n", + "2 Mouse \n", + "3 Mouse \n", + "4 Mouse " ] }, "execution_count": 18, @@ -1238,7 +1326,7 @@ } ], "source": [ - "remote.kinases.all_kinases(group=\"TK\", family=\"Tec\", species=\"Mouse\")" + "remote.kinases.all_kinases(groups=\"TK\", families=\"Tec\", species=\"Mouse\")" ] }, { @@ -1256,7 +1344,7 @@ ], "source": [ "try:\n", - " remote.kinases.all_kinases(group=\"XXX\")\n", + " remote.kinases.all_kinases(groups=\"XXX\")\n", "except SwaggerMappingError as e:\n", " print(e)" ] @@ -1297,8 +1385,10 @@ " \n", " \n", " kinase.klifs_id\n", - " kinase.hgnc_name\n", + " kinase.klifs_name\n", " kinase.full_name\n", + " kinase.gene_name\n", + " kinase.uniprot\n", " species.klifs\n", " \n", " \n", @@ -1308,6 +1398,8 @@ " 472\n", " BMX\n", " <NA>\n", + " BMX\n", + " <NA>\n", " Human\n", " \n", " \n", @@ -1315,8 +1407,11 @@ "" ], "text/plain": [ - " kinase.klifs_id kinase.hgnc_name kinase.full_name species.klifs\n", - "0 472 BMX Human" + " kinase.klifs_id kinase.klifs_name kinase.full_name kinase.gene_name \\\n", + "0 472 BMX BMX \n", + "\n", + " kinase.uniprot species.klifs \n", + "0 Human " ] }, "execution_count": 20, @@ -1325,7 +1420,7 @@ } ], "source": [ - "local.kinases.all_kinases(family=\"Tec\")" + "local.kinases.all_kinases(families=\"Tec\")" ] }, { @@ -1355,8 +1450,10 @@ " \n", " \n", " kinase.klifs_id\n", - " kinase.hgnc_name\n", + " kinase.klifs_name\n", " kinase.full_name\n", + " kinase.gene_name\n", + " kinase.uniprot\n", " species.klifs\n", " \n", " \n", @@ -1364,6 +1461,8 @@ " \n", " 0\n", " 532\n", + " ABL1\n", + " <NA>\n", " Abl1\n", " <NA>\n", " Mouse\n", @@ -1373,8 +1472,11 @@ "" ], "text/plain": [ - " kinase.klifs_id kinase.hgnc_name kinase.full_name species.klifs\n", - "0 532 Abl1 Mouse" + " kinase.klifs_id kinase.klifs_name kinase.full_name kinase.gene_name \\\n", + "0 532 ABL1 Abl1 \n", + "\n", + " kinase.uniprot species.klifs \n", + "0 Mouse " ] }, "execution_count": 21, @@ -1383,7 +1485,7 @@ } ], "source": [ - "local.kinases.all_kinases(family=\"Abl\", species=\"Mouse\")" + "local.kinases.all_kinases(families=\"Abl\", species=\"Mouse\")" ] }, { @@ -1401,7 +1503,7 @@ ], "source": [ "try:\n", - " local.kinases.all_kinases(group=\"XXX\")\n", + " local.kinases.all_kinases(groups=\"XXX\")\n", "except ValueError as e:\n", " print(e)" ] @@ -1448,12 +1550,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -1464,12 +1566,12 @@ " 0\n", " 1\n", " AKT1\n", + " v-akt murine thymoma viral oncogene homolog 1\n", " AKT1\n", " Akt\n", " AGC\n", " \n", " Human\n", - " v-akt murine thymoma viral oncogene homolog 1\n", " P31749\n", " 1479\n", " KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...\n", @@ -1479,17 +1581,17 @@ "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 1 AKT1 AKT1 Akt \n", + " kinase.klifs_id kinase.klifs_name \\\n", + "0 1 AKT1 \n", "\n", - " kinase.group kinase.class species.klifs \\\n", - "0 AGC Human \n", + " kinase.full_name kinase.gene_name \\\n", + "0 v-akt murine thymoma viral oncogene homolog 1 AKT1 \n", "\n", - " kinase.full_name kinase.uniprot kinase.iuphar \\\n", - "0 v-akt murine thymoma viral oncogene homolog 1 P31749 1479 \n", + " kinase.family kinase.group kinase.subfamily species.klifs kinase.uniprot \\\n", + "0 Akt AGC Human P31749 \n", "\n", - " kinase.pocket \n", - "0 KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME... " + " kinase.iuphar kinase.pocket \n", + "0 1479 KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME... " ] }, "execution_count": 23, @@ -1529,12 +1631,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -1545,12 +1647,12 @@ " 0\n", " 1\n", " AKT1\n", + " v-akt murine thymoma viral oncogene homolog 1\n", " AKT1\n", " Akt\n", " AGC\n", " \n", " Human\n", - " v-akt murine thymoma viral oncogene homolog 1\n", " P31749\n", " 1479\n", " KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...\n", @@ -1559,12 +1661,12 @@ " 1\n", " 2\n", " AKT2\n", + " v-akt murine thymoma viral oncogene homolog 2\n", " AKT2\n", " Akt\n", " AGC\n", " \n", " Human\n", - " v-akt murine thymoma viral oncogene homolog 2\n", " P31751\n", " 1480\n", " KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME...\n", @@ -1574,21 +1676,21 @@ "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 1 AKT1 AKT1 Akt \n", - "1 2 AKT2 AKT2 Akt \n", + " kinase.klifs_id kinase.klifs_name \\\n", + "0 1 AKT1 \n", + "1 2 AKT2 \n", "\n", - " kinase.group kinase.class species.klifs \\\n", - "0 AGC Human \n", - "1 AGC Human \n", + " kinase.full_name kinase.gene_name \\\n", + "0 v-akt murine thymoma viral oncogene homolog 1 AKT1 \n", + "1 v-akt murine thymoma viral oncogene homolog 2 AKT2 \n", "\n", - " kinase.full_name kinase.uniprot kinase.iuphar \\\n", - "0 v-akt murine thymoma viral oncogene homolog 1 P31749 1479 \n", - "1 v-akt murine thymoma viral oncogene homolog 2 P31751 1480 \n", + " kinase.family kinase.group kinase.subfamily species.klifs kinase.uniprot \\\n", + "0 Akt AGC Human P31749 \n", + "1 Akt AGC Human P31751 \n", "\n", - " kinase.pocket \n", - "0 KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME... \n", - "1 KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME... " + " kinase.iuphar kinase.pocket \n", + "0 1479 KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME... \n", + "1 1480 KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME... " ] }, "execution_count": 24, @@ -1632,26 +1734,6 @@ "execution_count": 26, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Index(['species.klifs', 'structure.pdb_id', 'structure.alternate_model',\n", - " 'structure.chain', 'ligand.expo_id', 'ligand_allosteric.expo_id',\n", - " 'structure.rmsd1', 'structure.rmsd2', 'structure.qualityscore',\n", - " 'structure.pocket', 'structure.resolution',\n", - " 'structure.missing_residues', 'structure.missing_atoms',\n", - " 'interaction.fingerprint', 'structure.fp_i', 'structure.fp_ii',\n", - " 'structure.bp_i_a', 'structure.bp_i_b', 'structure.bp_ii_in',\n", - " 'structure.bp_ii_a_in', 'structure.bp_ii_b_in', 'structure.bp_ii_out',\n", - " 'structure.bp_ii_b', 'structure.bp_iii', 'structure.bp_iv',\n", - " 'structure.bp_v', 'kinase.names', 'kinase.hgnc_name',\n", - " 'kinase.klifs_name', 'kinase.family', 'kinase.group', 'ligand.name',\n", - " 'ligand_allosteric.name', 'structure.dfg', 'structure.ac_helix',\n", - " 'structure.filepath', 'structure.klifs_id', 'kinase.klifs_id'],\n", - " dtype='object')\n" - ] - }, { "data": { "text/html": [ @@ -1675,12 +1757,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -1691,6 +1773,7 @@ " 0\n", " 472\n", " BMX\n", + " <NA>\n", " BMX\n", " Tec\n", " TK\n", @@ -1699,18 +1782,17 @@ " <NA>\n", " <NA>\n", " <NA>\n", - " <NA>\n", " \n", " \n", "\n", "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 472 BMX BMX Tec \n", + " kinase.klifs_id kinase.klifs_name kinase.full_name kinase.gene_name \\\n", + "0 472 BMX BMX \n", "\n", - " kinase.group kinase.class species.klifs kinase.full_name kinase.uniprot \\\n", - "0 TK Human \n", + " kinase.family kinase.group kinase.subfamily species.klifs kinase.uniprot \\\n", + "0 Tec TK Human \n", "\n", " kinase.iuphar kinase.pocket \n", "0 " @@ -1730,26 +1812,6 @@ "execution_count": 27, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Index(['species.klifs', 'structure.pdb_id', 'structure.alternate_model',\n", - " 'structure.chain', 'ligand.expo_id', 'ligand_allosteric.expo_id',\n", - " 'structure.rmsd1', 'structure.rmsd2', 'structure.qualityscore',\n", - " 'structure.pocket', 'structure.resolution',\n", - " 'structure.missing_residues', 'structure.missing_atoms',\n", - " 'interaction.fingerprint', 'structure.fp_i', 'structure.fp_ii',\n", - " 'structure.bp_i_a', 'structure.bp_i_b', 'structure.bp_ii_in',\n", - " 'structure.bp_ii_a_in', 'structure.bp_ii_b_in', 'structure.bp_ii_out',\n", - " 'structure.bp_ii_b', 'structure.bp_iii', 'structure.bp_iv',\n", - " 'structure.bp_v', 'kinase.names', 'kinase.hgnc_name',\n", - " 'kinase.klifs_name', 'kinase.family', 'kinase.group', 'ligand.name',\n", - " 'ligand_allosteric.name', 'structure.dfg', 'structure.ac_helix',\n", - " 'structure.filepath', 'structure.klifs_id', 'kinase.klifs_id'],\n", - " dtype='object')\n" - ] - }, { "data": { "text/html": [ @@ -1773,12 +1835,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -1789,6 +1851,7 @@ " 0\n", " 472\n", " BMX\n", + " <NA>\n", " BMX\n", " Tec\n", " TK\n", @@ -1797,12 +1860,12 @@ " <NA>\n", " <NA>\n", " <NA>\n", - " <NA>\n", " \n", " \n", " 1\n", " 509\n", " BRAF\n", + " <NA>\n", " BRAF\n", " RAF\n", " TKL\n", @@ -1811,20 +1874,19 @@ " <NA>\n", " <NA>\n", " <NA>\n", - " <NA>\n", " \n", " \n", "\n", "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 472 BMX BMX Tec \n", - "1 509 BRAF BRAF RAF \n", + " kinase.klifs_id kinase.klifs_name kinase.full_name kinase.gene_name \\\n", + "0 472 BMX BMX \n", + "1 509 BRAF BRAF \n", "\n", - " kinase.group kinase.class species.klifs kinase.full_name kinase.uniprot \\\n", - "0 TK Human \n", - "1 TKL Human \n", + " kinase.family kinase.group kinase.subfamily species.klifs kinase.uniprot \\\n", + "0 Tec TK Human \n", + "1 RAF TKL Human \n", "\n", " kinase.iuphar kinase.pocket \n", "0 \n", @@ -1849,20 +1911,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "Index(['species.klifs', 'structure.pdb_id', 'structure.alternate_model',\n", - " 'structure.chain', 'ligand.expo_id', 'ligand_allosteric.expo_id',\n", - " 'structure.rmsd1', 'structure.rmsd2', 'structure.qualityscore',\n", - " 'structure.pocket', 'structure.resolution',\n", - " 'structure.missing_residues', 'structure.missing_atoms',\n", - " 'interaction.fingerprint', 'structure.fp_i', 'structure.fp_ii',\n", - " 'structure.bp_i_a', 'structure.bp_i_b', 'structure.bp_ii_in',\n", - " 'structure.bp_ii_a_in', 'structure.bp_ii_b_in', 'structure.bp_ii_out',\n", - " 'structure.bp_ii_b', 'structure.bp_iii', 'structure.bp_iv',\n", - " 'structure.bp_v', 'kinase.names', 'kinase.hgnc_name',\n", - " 'kinase.klifs_name', 'kinase.family', 'kinase.group', 'ligand.name',\n", - " 'ligand_allosteric.name', 'structure.dfg', 'structure.ac_helix',\n", - " 'structure.filepath', 'structure.klifs_id', 'kinase.klifs_id'],\n", - " dtype='object')\n", "Input values yield no results.\n" ] } @@ -1893,27 +1941,6 @@ "execution_count": 29, "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a9f5011d651847b89411360165303c92", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "text/html": [ @@ -1937,12 +1964,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -1953,12 +1980,12 @@ " 0\n", " 574\n", " BMX\n", + " BMX non-receptor tyrosine kinase\n", " Bmx\n", " Tec\n", " TK\n", " \n", " Mouse\n", - " BMX non-receptor tyrosine kinase\n", " P97504\n", " 0\n", " KELGNGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", @@ -1967,12 +1994,12 @@ " 1\n", " 472\n", " BMX\n", + " BMX non-receptor tyrosine kinase\n", " BMX\n", " Tec\n", " TK\n", " \n", " Human\n", - " BMX non-receptor tyrosine kinase\n", " P51813\n", " 1942\n", " KELGSGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", @@ -1982,13 +2009,13 @@ "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 574 BMX Bmx Tec \n", - "1 472 BMX BMX Tec \n", + " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n", + "0 574 BMX BMX non-receptor tyrosine kinase \n", + "1 472 BMX BMX non-receptor tyrosine kinase \n", "\n", - " kinase.group kinase.class species.klifs kinase.full_name \\\n", - "0 TK Mouse BMX non-receptor tyrosine kinase \n", - "1 TK Human BMX non-receptor tyrosine kinase \n", + " kinase.gene_name kinase.family kinase.group kinase.subfamily species.klifs \\\n", + "0 Bmx Tec TK Mouse \n", + "1 BMX Tec TK Human \n", "\n", " kinase.uniprot kinase.iuphar \\\n", "0 P97504 0 \n", @@ -2013,27 +2040,6 @@ "execution_count": 30, "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e9c4940071424d93b26674b292bd351c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "text/html": [ @@ -2057,12 +2063,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -2073,12 +2079,12 @@ " 0\n", " 574\n", " BMX\n", + " BMX non-receptor tyrosine kinase\n", " Bmx\n", " Tec\n", " TK\n", " \n", " Mouse\n", - " BMX non-receptor tyrosine kinase\n", " P97504\n", " 0\n", " KELGNGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", @@ -2087,12 +2093,12 @@ " 1\n", " 472\n", " BMX\n", + " BMX non-receptor tyrosine kinase\n", " BMX\n", " Tec\n", " TK\n", " \n", " Human\n", - " BMX non-receptor tyrosine kinase\n", " P51813\n", " 1942\n", " KELGSGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", @@ -2102,13 +2108,13 @@ "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 574 BMX Bmx Tec \n", - "1 472 BMX BMX Tec \n", + " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n", + "0 574 BMX BMX non-receptor tyrosine kinase \n", + "1 472 BMX BMX non-receptor tyrosine kinase \n", "\n", - " kinase.group kinase.class species.klifs kinase.full_name \\\n", - "0 TK Mouse BMX non-receptor tyrosine kinase \n", - "1 TK Human BMX non-receptor tyrosine kinase \n", + " kinase.gene_name kinase.family kinase.group kinase.subfamily species.klifs \\\n", + "0 Bmx Tec TK Mouse \n", + "1 BMX Tec TK Human \n", "\n", " kinase.uniprot kinase.iuphar \\\n", "0 P97504 0 \n", @@ -2133,35 +2139,11 @@ "execution_count": 31, "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "28663dc6967d42ada1bc88beb1052b8e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:opencadd.databases.klifs.core:There was (were) 1/1 failed request(s).\n", - "Show error messages (up to 5 messages only):\n", - "ERROR:opencadd.databases.klifs.core:Error for BMX: Expected type to be dict for value [400, 'KLIFS error: The provided species does not exist. '] to unmarshal to a .Was instead.\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "Input values yield no results.\n" + "Expected type to be dict for value [400, 'KLIFS error: The provided species does not exist. '] to unmarshal to a .Was instead.\n" ] } ], @@ -2177,36 +2159,6 @@ "execution_count": 32, "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0e4fe9af47f04b078c02c5d2fbfd4c81", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=3.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:opencadd.databases.klifs.core:There was (were) 1/3 failed request(s).\n", - "Show error messages (up to 5 messages only):\n", - "ERROR:opencadd.databases.klifs.core:Error for XXX: Expected type to be dict for value [400, 'KLIFS error: An unknown kinase name was provided'] to unmarshal to a .Was instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "text/html": [ @@ -2230,12 +2182,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -2244,52 +2196,52 @@ " \n", " \n", " 0\n", - " 472\n", - " BMX\n", - " BMX\n", - " Tec\n", - " TK\n", - " \n", - " Human\n", - " BMX non-receptor tyrosine kinase\n", - " P51813\n", - " 1942\n", - " KELGSGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", - " \n", - " \n", - " 1\n", " 509\n", " BRAF\n", + " B-Raf proto-oncogene, serine/threonine kinase\n", " BRAF\n", " RAF\n", " TKL\n", " RAF\n", " Human\n", - " B-Raf proto-oncogene, serine/threonine kinase\n", " P15056\n", " 1943\n", " QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ...\n", " \n", + " \n", + " 1\n", + " 472\n", + " BMX\n", + " BMX non-receptor tyrosine kinase\n", + " BMX\n", + " Tec\n", + " TK\n", + " \n", + " Human\n", + " P51813\n", + " 1942\n", + " KELGSGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 472 BMX BMX Tec \n", - "1 509 BRAF BRAF RAF \n", + " kinase.klifs_id kinase.klifs_name \\\n", + "0 509 BRAF \n", + "1 472 BMX \n", "\n", - " kinase.group kinase.class species.klifs \\\n", - "0 TK Human \n", - "1 TKL RAF Human \n", + " kinase.full_name kinase.gene_name \\\n", + "0 B-Raf proto-oncogene, serine/threonine kinase BRAF \n", + "1 BMX non-receptor tyrosine kinase BMX \n", "\n", - " kinase.full_name kinase.uniprot kinase.iuphar \\\n", - "0 BMX non-receptor tyrosine kinase P51813 1942 \n", - "1 B-Raf proto-oncogene, serine/threonine kinase P15056 1943 \n", + " kinase.family kinase.group kinase.subfamily species.klifs kinase.uniprot \\\n", + "0 RAF TKL RAF Human P15056 \n", + "1 Tec TK Human P51813 \n", "\n", - " kinase.pocket \n", - "0 KELGSGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... \n", - "1 QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... " + " kinase.iuphar kinase.pocket \n", + "0 1943 QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... \n", + "1 1942 KELGSGQFGVVQLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... " ] }, "execution_count": 32, @@ -2336,12 +2288,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -2352,6 +2304,7 @@ " 0\n", " 472\n", " BMX\n", + " <NA>\n", " BMX\n", " Tec\n", " TK\n", @@ -2360,18 +2313,17 @@ " <NA>\n", " <NA>\n", " <NA>\n", - " <NA>\n", " \n", " \n", "\n", "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 472 BMX BMX Tec \n", + " kinase.klifs_id kinase.klifs_name kinase.full_name kinase.gene_name \\\n", + "0 472 BMX BMX \n", "\n", - " kinase.group kinase.class species.klifs kinase.full_name kinase.uniprot \\\n", - "0 TK Human \n", + " kinase.family kinase.group kinase.subfamily species.klifs kinase.uniprot \\\n", + "0 Tec TK Human \n", "\n", " kinase.iuphar kinase.pocket \n", "0 " @@ -2434,12 +2386,12 @@ " \n", " kinase.klifs_id\n", " kinase.klifs_name\n", - " kinase.hgnc_name\n", + " kinase.full_name\n", + " kinase.gene_name\n", " kinase.family\n", " kinase.group\n", - " kinase.class\n", + " kinase.subfamily\n", " species.klifs\n", - " kinase.full_name\n", " kinase.uniprot\n", " kinase.iuphar\n", " kinase.pocket\n", @@ -2450,6 +2402,7 @@ " 0\n", " 472\n", " BMX\n", + " <NA>\n", " BMX\n", " Tec\n", " TK\n", @@ -2458,12 +2411,12 @@ " <NA>\n", " <NA>\n", " <NA>\n", - " <NA>\n", " \n", " \n", " 1\n", " 509\n", " BRAF\n", + " <NA>\n", " BRAF\n", " RAF\n", " TKL\n", @@ -2472,20 +2425,19 @@ " <NA>\n", " <NA>\n", " <NA>\n", - " <NA>\n", " \n", " \n", "\n", "" ], "text/plain": [ - " kinase.klifs_id kinase.klifs_name kinase.hgnc_name kinase.family \\\n", - "0 472 BMX BMX Tec \n", - "1 509 BRAF BRAF RAF \n", + " kinase.klifs_id kinase.klifs_name kinase.full_name kinase.gene_name \\\n", + "0 472 BMX BMX \n", + "1 509 BRAF BRAF \n", "\n", - " kinase.group kinase.class species.klifs kinase.full_name kinase.uniprot \\\n", - "0 TK Human \n", - "1 TKL Human \n", + " kinase.family kinase.group kinase.subfamily species.klifs kinase.uniprot \\\n", + "0 Tec TK Human \n", + "1 RAF TKL Human \n", "\n", " kinase.iuphar kinase.pocket \n", "0 \n", @@ -2583,24 +2535,24 @@ " ...\n", " \n", " \n", - " 3580\n", - " 3871\n", - " Q6G\n", - " Selpercatinib\n", - " O(c1ncc(cc1)CN2[C@H]3CN(c4ncc(cc4)C=5C=6N(N=CC...\n", - " InChI not available\n", + " 3791\n", + " 4083\n", + " GC6\n", + " [(3R)-3-azanylpiperidin-1-yl]-[1-(2-azanylpyri...\n", + " c1cc2c(cc1C#CC3(CCCCC3)O)n(cc2C(=O)N4CCC[C@H](...\n", + " DADSGHOHYQROCR-LJQANCHMSA-N\n", " \n", " \n", - " 3581\n", - " 3872\n", - " Q4J\n", - " Pralsetinib\n", - " FC=1C=NN(c2ncc(cc2)[C@@H](NC(=O)C3(OC)CCC(c4nc...\n", - " InChI not available\n", + " 3792\n", + " 4082\n", + " ON6\n", + " (2~{Z})-6-[[2,6-bis(chloranyl)phenyl]methylsul...\n", + " c1cc(c(c(c1)Cl)CS(=O)(=O)c2ccc3c(c2)NC(=O)/C(=...\n", + " VUSCGLBHGBXFCC-NKVSQWTQSA-N\n", " \n", " \n", "\n", - "

3582 rows × 5 columns

\n", + "

3793 rows × 5 columns

\n", "" ], "text/plain": [ @@ -2608,31 +2560,31 @@ "0 1 IHZ \n", "1 2 477 \n", "... ... ... \n", - "3580 3871 Q6G \n", - "3581 3872 Q4J \n", + "3791 4083 GC6 \n", + "3792 4082 ON6 \n", "\n", " ligand.name \\\n", "0 5-[(2-methyl-5-{[3-(trifluoromethyl)phenyl]car... \n", "1 3-{2-[5-(difluoromethyl)-2H-thieno[3,2-c]pyraz... \n", "... ... \n", - "3580 Selpercatinib \n", - "3581 Pralsetinib \n", + "3791 [(3R)-3-azanylpiperidin-1-yl]-[1-(2-azanylpyri... \n", + "3792 (2~{Z})-6-[[2,6-bis(chloranyl)phenyl]methylsul... \n", "\n", " ligand.smiles \\\n", "0 FC(F)(F)c1cc(NC(=O)c2cc(Nc3cncc(c3)C(=O)N)c(cc... \n", "1 S1C=2C(=NNC2C=C1C(F)F)C=3Nc4c(ccc(c4)C(O)(CC)C... \n", "... ... \n", - "3580 O(c1ncc(cc1)CN2[C@H]3CN(c4ncc(cc4)C=5C=6N(N=CC... \n", - "3581 FC=1C=NN(c2ncc(cc2)[C@@H](NC(=O)C3(OC)CCC(c4nc... \n", + "3791 c1cc2c(cc1C#CC3(CCCCC3)O)n(cc2C(=O)N4CCC[C@H](... \n", + "3792 c1cc(c(c(c1)Cl)CS(=O)(=O)c2ccc3c(c2)NC(=O)/C(=... \n", "\n", " ligand.inchikey \n", "0 SAAYRHKJHDIDPH-UHFFFAOYSA-N \n", "1 CQZZZUNOWZUNNG-UHFFFAOYSA-N \n", "... ... \n", - "3580 InChI not available \n", - "3581 InChI not available \n", + "3791 DADSGHOHYQROCR-LJQANCHMSA-N \n", + "3792 VUSCGLBHGBXFCC-NKVSQWTQSA-N \n", "\n", - "[3582 rows x 5 columns]" + "[3793 rows x 5 columns]" ] }, "execution_count": 36, @@ -2711,6 +2663,14 @@ " <NA>\n", " <NA>\n", " \n", + " \n", + " 3\n", + " <NA>\n", + " -\n", + " -\n", + " <NA>\n", + " <NA>\n", + " \n", " \n", "\n", "" @@ -2720,16 +2680,19 @@ "0 1N1 \n", "1 QH1 \n", "2 PRC \n", + "3 - \n", "\n", " ligand.name ligand.smiles \\\n", "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... \n", "1 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf... \n", "2 N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]... \n", + "3 - \n", "\n", " ligand.inchikey \n", "0 \n", "1 \n", - "2 " + "2 \n", + "3 " ] }, "execution_count": 37, @@ -2770,24 +2733,17 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "54967774829044fca7d8dfbb91a7290c", + "model_id": "490e594cdbc643c9b105c4924b8e4738", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" + "Processing...: 0%| | 0/1 [00:00.Was instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" + "Error for 10000: Expected type to be dict for value [400, 'KLIFS error: The provided kinase ID(s) is/are invalid'] to unmarshal to a .Was instead.\n" ] }, { @@ -2973,26 +2922,26 @@ " ...\n", " \n", " \n", - " 64\n", - " 3715\n", - " QH1\n", - " 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf...\n", - " S(O)(O)(Nc1c(F)c(N2c3c(nc(N(C4C=CN(C(=O)C)C=C4...\n", + " 68\n", + " 4028\n", + " VQP\n", + " N-(3,3-dimethylbutyl)-N&#039;-{2-fluoro-5-[(5-...\n", + " Fc1c(Nc2c(cc(F)c(NC(=O)NCCC(C)(C)C)c2)C)ccc3N=...\n", " InChI not available\n", " 509\n", " \n", " \n", - " 65\n", - " 3829\n", - " E7M\n", - " 3-[(imidazo[1,2-b]pyridazin-3-yl)ethynyl]-4-me...\n", - " FC(F)(F)c1c(ccc(NC(=O)c2cc(C#CC=3N4N=CC=CC4=NC...\n", + " 69\n", + " 4029\n", + " V5J\n", + " 3-(2-cyanopropan-2-yl)-N-[2-fluoro-4-methyl-5-...\n", + " Fc1c(NC(=O)c2cc(ccc2)C(C#N)(C)C)cc(c3cnc4c(c3)...\n", " InChI not available\n", " 509\n", " \n", " \n", "\n", - "

66 rows × 6 columns

\n", + "

70 rows × 6 columns

\n", "" ], "text/plain": [ @@ -3000,31 +2949,31 @@ "0 281 1N1 \n", "1 632 PP2 \n", ".. ... ... \n", - "64 3715 QH1 \n", - "65 3829 E7M \n", + "68 4028 VQP \n", + "69 4029 V5J \n", "\n", " ligand.name \\\n", "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... \n", "1 1-TERT-BUTYL-3-(4-CHLORO-PHENYL)-1H-PYRAZOLO[3... \n", ".. ... \n", - "64 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf... \n", - "65 3-[(imidazo[1,2-b]pyridazin-3-yl)ethynyl]-4-me... \n", + "68 N-(3,3-dimethylbutyl)-N'-{2-fluoro-5-[(5-... \n", + "69 3-(2-cyanopropan-2-yl)-N-[2-fluoro-4-methyl-5-... \n", "\n", " ligand.smiles \\\n", "0 Clc1c(NC(=O)C=2SC(=NC2)Nc3nc(nc(N4CCN(CCO)CC4)... \n", "1 Clc1ccc(cc1)C2=[NH+]N(c3ncnc(N)c32)C(C)(C)C \n", ".. ... \n", - "64 S(O)(O)(Nc1c(F)c(N2c3c(nc(N(C4C=CN(C(=O)C)C=C4... \n", - "65 FC(F)(F)c1c(ccc(NC(=O)c2cc(C#CC=3N4N=CC=CC4=NC... \n", + "68 Fc1c(Nc2c(cc(F)c(NC(=O)NCCC(C)(C)C)c2)C)ccc3N=... \n", + "69 Fc1c(NC(=O)c2cc(ccc2)C(C#N)(C)C)cc(c3cnc4c(c3)... \n", "\n", " ligand.inchikey kinase.klifs_id (query) \n", "0 ZBNZXTGUTAYRHI-UHFFFAOYSA-N 472 \n", "1 PBBRWFOVCUAONR-UHFFFAOYSA-O 472 \n", ".. ... ... \n", - "64 InChI not available 509 \n", - "65 InChI not available 509 \n", + "68 InChI not available 509 \n", + "69 InChI not available 509 \n", "\n", - "[66 rows x 6 columns]" + "[70 rows x 6 columns]" ] }, "execution_count": 39, @@ -3044,12 +2993,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ba1cb3a240c945f9ba97b4b5271612ab", + "model_id": "00879a77d46a408984bf81abe93b1931", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" + "Processing...: 0%| | 0/1 [00:00.Was instead.\n" + "Error for 10000: Expected type to be dict for value [400, 'KLIFS error: The provided kinase ID(s) is/are invalid'] to unmarshal to a .Was instead.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", "Input values yield no results.\n" ] } @@ -3274,50 +3222,15 @@ "execution_count": 44, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:opencadd.databases.klifs.remote:Fetch kinase KLIFS IDs for input kinase names...\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0f159507aafc4f1db28117046b74aaee", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=2.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:opencadd.databases.klifs.remote:Fetch ligands based on these KLIFS IDs...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "58815b1dc36a4e8e85f4f1745fa454aa", + "model_id": "98bd8339663140a7865be50814f5490e", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=4.0), HTML(value='')))" + "Processing...: 0%| | 0/4 [00:00.Was instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" + "Error for 574: Expected type to be dict for value [400, 'KLIFS error: The provided kinase ID(s) is/are invalid'] to unmarshal to a .Was instead.\n" ] }, { @@ -3367,33 +3273,33 @@ " ligand.inchikey\n", " kinase.klifs_id (query)\n", " kinase.klifs_name (query)\n", - " kinase.hgnc_name (query)\n", + " kinase.gene_name (query)\n", " species.klifs (query)\n", " \n", " \n", " \n", " \n", " 0\n", - " 26\n", - " ADP\n", - " ADENOSINE-5&apos;-DIPHOSPHATE\n", - " P(=O)(OP(=O)(O)O)(OC[C@H]1O[C@@H](N2c3ncnc(N)c...\n", - " XTWYTFMLZFPYCI-KQYNXXCUSA-N\n", - " 392\n", - " ABL1\n", - " ABL1\n", + " 281\n", + " 1N1\n", + " N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX...\n", + " Clc1c(NC(=O)C=2SC(=NC2)Nc3nc(nc(N4CCN(CCO)CC4)...\n", + " ZBNZXTGUTAYRHI-UHFFFAOYSA-N\n", + " 472\n", + " BMX\n", + " BMX\n", " Human\n", " \n", " \n", " 1\n", - " 38\n", - " AGS\n", - " PHOSPHOTHIOPHOSPHORIC ACID-ADENYLATE ESTER\n", - " S=P(OP(=O)(OP(=O)(OC[C@H]1O[C@@H](N2c3ncnc(N)c...\n", - " NLTUCYMLOPLUHL-KQYNXXCUSA-N\n", - " 392\n", - " ABL1\n", - " ABL1\n", + " 632\n", + " PP2\n", + " 1-TERT-BUTYL-3-(4-CHLORO-PHENYL)-1H-PYRAZOLO[3...\n", + " Clc1ccc(cc1)C2=[NH+]N(c3ncnc(N)c32)C(C)(C)C\n", + " PBBRWFOVCUAONR-UHFFFAOYSA-O\n", + " 472\n", + " BMX\n", + " BMX\n", " Human\n", " \n", " \n", @@ -3409,71 +3315,71 @@ " ...\n", " \n", " \n", - " 43\n", - " 632\n", - " PP2\n", - " 1-TERT-BUTYL-3-(4-CHLORO-PHENYL)-1H-PYRAZOLO[3...\n", - " Clc1ccc(cc1)C2=[NH+]N(c3ncnc(N)c32)C(C)(C)C\n", - " PBBRWFOVCUAONR-UHFFFAOYSA-O\n", - " 472\n", - " BMX\n", - " BMX\n", - " Human\n", + " 45\n", + " 3247\n", + " FYH\n", + " 3-(morpholin-4-ylmethyl)-~{N}-[4-(trifluoromet...\n", + " FC(F)(F)Oc1ccc(NC(=O)c2cc(ccc2)CN3CCOCC3)cc1\n", + " MYWULUKAXYAFSH-UHFFFAOYSA-N\n", + " 532\n", + " ABL1\n", + " Abl1\n", + " Mouse\n", " \n", " \n", - " 44\n", - " 3716\n", - " H88\n", - " ~{N}-[2-methyl-5-[8-[4-(methylsulfonylamino)ph...\n", - " S(=O)(=O)(Nc1ccc(c2cc3ncc4c(N(c5cc(NC(=O)CCO)c...\n", - " InChI not available\n", - " 472\n", - " BMX\n", - " BMX\n", - " Human\n", + " 46\n", + " 3255\n", + " FYW\n", + " 6-[(3~{R})-3-oxidanylpyrrolidin-1-yl]-5-pyrimi...\n", + " c1cc(ccc1NC(=O)c2cc(c(nc2)N3CC[C@H](C3)O)c4cnc...\n", + " LARFZNXVNANWFD-MRXNPFEDSA-N\n", + " 532\n", + " ABL1\n", + " Abl1\n", + " Mouse\n", " \n", " \n", "\n", - "

45 rows × 9 columns

\n", + "

47 rows × 9 columns

\n", "" ], "text/plain": [ " ligand.klifs_id ligand.expo_id \\\n", - "0 26 ADP \n", - "1 38 AGS \n", + "0 281 1N1 \n", + "1 632 PP2 \n", ".. ... ... \n", - "43 632 PP2 \n", - "44 3716 H88 \n", + "45 3247 FYH \n", + "46 3255 FYW \n", "\n", " ligand.name \\\n", - "0 ADENOSINE-5'-DIPHOSPHATE \n", - "1 PHOSPHOTHIOPHOSPHORIC ACID-ADENYLATE ESTER \n", + "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... \n", + "1 1-TERT-BUTYL-3-(4-CHLORO-PHENYL)-1H-PYRAZOLO[3... \n", ".. ... \n", - "43 1-TERT-BUTYL-3-(4-CHLORO-PHENYL)-1H-PYRAZOLO[3... \n", - "44 ~{N}-[2-methyl-5-[8-[4-(methylsulfonylamino)ph... \n", + "45 3-(morpholin-4-ylmethyl)-~{N}-[4-(trifluoromet... \n", + "46 6-[(3~{R})-3-oxidanylpyrrolidin-1-yl]-5-pyrimi... \n", "\n", " ligand.smiles \\\n", - "0 P(=O)(OP(=O)(O)O)(OC[C@H]1O[C@@H](N2c3ncnc(N)c... \n", - "1 S=P(OP(=O)(OP(=O)(OC[C@H]1O[C@@H](N2c3ncnc(N)c... \n", + "0 Clc1c(NC(=O)C=2SC(=NC2)Nc3nc(nc(N4CCN(CCO)CC4)... \n", + "1 Clc1ccc(cc1)C2=[NH+]N(c3ncnc(N)c32)C(C)(C)C \n", ".. ... \n", - "43 Clc1ccc(cc1)C2=[NH+]N(c3ncnc(N)c32)C(C)(C)C \n", - "44 S(=O)(=O)(Nc1ccc(c2cc3ncc4c(N(c5cc(NC(=O)CCO)c... \n", + "45 FC(F)(F)Oc1ccc(NC(=O)c2cc(ccc2)CN3CCOCC3)cc1 \n", + "46 c1cc(ccc1NC(=O)c2cc(c(nc2)N3CC[C@H](C3)O)c4cnc... \n", "\n", " ligand.inchikey kinase.klifs_id (query) \\\n", - "0 XTWYTFMLZFPYCI-KQYNXXCUSA-N 392 \n", - "1 NLTUCYMLOPLUHL-KQYNXXCUSA-N 392 \n", + "0 ZBNZXTGUTAYRHI-UHFFFAOYSA-N 472 \n", + "1 PBBRWFOVCUAONR-UHFFFAOYSA-O 472 \n", ".. ... ... \n", - "43 PBBRWFOVCUAONR-UHFFFAOYSA-O 472 \n", - "44 InChI not available 472 \n", + "45 MYWULUKAXYAFSH-UHFFFAOYSA-N 532 \n", + "46 LARFZNXVNANWFD-MRXNPFEDSA-N 532 \n", "\n", - " kinase.klifs_name (query) kinase.hgnc_name (query) species.klifs (query) \n", - "0 ABL1 ABL1 Human \n", - "1 ABL1 ABL1 Human \n", + " kinase.klifs_name (query) kinase.gene_name (query) species.klifs (query) \n", + "0 BMX BMX Human \n", + "1 BMX BMX Human \n", ".. ... ... ... \n", - "43 BMX BMX Human \n", - "44 BMX BMX Human \n", + "45 ABL1 Abl1 Mouse \n", + "46 ABL1 Abl1 Mouse \n", "\n", - "[45 rows x 9 columns]" + "[47 rows x 9 columns]" ] }, "execution_count": 44, @@ -3490,42 +3396,11 @@ "execution_count": 45, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:opencadd.databases.klifs.remote:Fetch kinase KLIFS IDs for input kinase names...\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "35833ed4ad6d4e24a95100311633c2e5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:opencadd.databases.klifs.core:There was (were) 1/1 failed request(s).\n", - "Show error messages (up to 5 messages only):\n", - "ERROR:opencadd.databases.klifs.core:Error for XXX: Expected type to be dict for value [400, 'KLIFS error: An unknown kinase name was provided'] to unmarshal to a .Was instead.\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "Input values yield no results.\n" + "Expected type to be dict for value [400, 'KLIFS error: An unknown kinase name was provided'] to unmarshal to a .Was instead.\n" ] } ], @@ -3575,7 +3450,7 @@ " ligand.smiles\n", " ligand.inchikey\n", " kinase.klifs_name (query)\n", - " kinase.hgnc_name (query)\n", + " kinase.gene_name (query)\n", " species.klifs (query)\n", " \n", " \n", @@ -3615,7 +3490,7 @@ "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... \n", "1 N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]... \n", "\n", - " ligand.inchikey kinase.klifs_name (query) kinase.hgnc_name (query) \\\n", + " ligand.inchikey kinase.klifs_name (query) kinase.gene_name (query) \\\n", "0 BMX BMX \n", "1 ABL1 Abl1 \n", "\n", @@ -4041,11 +3916,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -4056,6 +3934,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -4075,34 +3954,39 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", " \n", " 0\n", - " 2542\n", - " 4ejn\n", - " -\n", + " 6765\n", + " 3cqw\n", + " B\n", " A\n", " Human\n", " 1\n", " AKT1\n", " <NA>\n", " <NA>\n", - " KLLGKGTFGKVILYAMKIL_______VLQNSRPFLTALKYSCFVME...\n", + " <NA>\n", + " KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...\n", + " CQW\n", " -\n", - " 0R4\n", + " 2147\n", + " 0\n", " <NA>\n", " <NA>\n", - " out\n", - " na\n", - " 2.19\n", - " 4.4\n", - " 7\n", - " 22\n", - " 0.950\n", - " 2.319\n", - " False\n", + " in\n", + " in\n", + " 2.0\n", + " 8.0\n", + " 0\n", + " 0\n", + " 0.779\n", + " 2.093\n", + " <NA>\n", + " True\n", " False\n", " False\n", " False\n", @@ -4117,35 +4001,40 @@ " False\n", " False\n", " False\n", - " 18.979799\n", - " 65.341499\n", - " 56.192699\n", + " 17.943501\n", + " 58.283501\n", + " 52.774200\n", " <NA>\n", + " False\n", " \n", " \n", " 1\n", - " 10881\n", - " 6npz\n", - " A\n", + " 10429\n", + " 6c0i\n", + " C\n", " B\n", " Human\n", " 1\n", " AKT1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...\n", " -\n", " -\n", + " 0\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", " in\n", - " 2.12\n", + " 2.4\n", " 8.0\n", " 0\n", " 0\n", " 0.776\n", - " 2.092\n", + " 2.091\n", + " <NA>\n", " False\n", " False\n", " False\n", @@ -4161,10 +4050,11 @@ " False\n", " False\n", " False\n", - " 17.940701\n", - " 60.261398\n", - " 66.135399\n", + " 17.901100\n", + " 59.594799\n", + " 60.872501\n", " <NA>\n", + " False\n", " \n", " \n", " ...\n", @@ -4209,9 +4099,14 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 11776\n", + " 12519\n", " 9070\n", " 6bq1\n", " -\n", @@ -4221,12 +4116,15 @@ " PI4KA\n", " <NA>\n", " <NA>\n", + " <NA>\n", " _PMQSAAKAPYLAAIFKVGDCRQDMLALQIIDLFVFPYRVVCGVIE...\n", " E4S\n", " -\n", + " 2974\n", + " 0\n", " <NA>\n", " <NA>\n", - " in\n", + " out-like\n", " in\n", " NaN\n", " 6.8\n", @@ -4234,6 +4132,7 @@ " 0\n", " 1.704\n", " 2.676\n", + " <NA>\n", " True\n", " False\n", " False\n", @@ -4253,9 +4152,10 @@ " 58.963501\n", " 131.186996\n", " <NA>\n", + " False\n", " \n", " \n", - " 11777\n", + " 12520\n", " 9069\n", " 6bq1\n", " -\n", @@ -4265,12 +4165,15 @@ " PI4KA\n", " <NA>\n", " <NA>\n", + " <NA>\n", " _PMQSAAKAPYLAAIFKVGDCRQDMLALQIIDLFVFPYRVVCGVIE...\n", " E4S\n", " -\n", + " 2974\n", + " 0\n", " <NA>\n", " <NA>\n", - " in\n", + " out-like\n", " in\n", " NaN\n", " 6.8\n", @@ -4278,6 +4181,7 @@ " 0\n", " 1.699\n", " 2.670\n", + " <NA>\n", " True\n", " True\n", " False\n", @@ -4297,112 +4201,120 @@ " 58.645401\n", " 136.503006\n", " <NA>\n", + " False\n", " \n", " \n", "\n", - "

11778 rows × 41 columns

\n", + "

12521 rows × 46 columns

\n", "" ], "text/plain": [ " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", - "0 2542 4ejn - \n", - "1 10881 6npz A \n", + "0 6765 3cqw B \n", + "1 10429 6c0i C \n", "... ... ... ... \n", - "11776 9070 6bq1 - \n", - "11777 9069 6bq1 - \n", + "12519 9070 6bq1 - \n", + "12520 9069 6bq1 - \n", "\n", " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", "0 A Human 1 AKT1 \n", "1 B Human 1 AKT1 \n", "... ... ... ... ... \n", - "11776 A Human 1096 PI4KA \n", - "11777 E Human 1096 PI4KA \n", + "12519 A Human 1096 PI4KA \n", + "12520 E Human 1096 PI4KA \n", "\n", - " kinase.family kinase.group \\\n", - "0 \n", - "1 \n", - "... ... ... \n", - "11776 \n", - "11777 \n", + " kinase.names kinase.family kinase.group \\\n", + "0 \n", + "1 \n", + "... ... ... ... \n", + "12519 \n", + "12520 \n", "\n", " structure.pocket ligand.expo_id \\\n", - "0 KLLGKGTFGKVILYAMKIL_______VLQNSRPFLTALKYSCFVME... - \n", + "0 KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME... CQW \n", "1 KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME... - \n", "... ... ... \n", - "11776 _PMQSAAKAPYLAAIFKVGDCRQDMLALQIIDLFVFPYRVVCGVIE... E4S \n", - "11777 _PMQSAAKAPYLAAIFKVGDCRQDMLALQIIDLFVFPYRVVCGVIE... E4S \n", - "\n", - " ligand_allosteric.expo_id ligand.name ligand_allosteric.name \\\n", - "0 0R4 \n", - "1 - \n", - "... ... ... ... \n", - "11776 - \n", - "11777 - \n", - "\n", - " structure.dfg structure.ac_helix structure.resolution \\\n", - "0 out na 2.19 \n", - "1 in in 2.12 \n", - "... ... ... ... \n", - "11776 in in NaN \n", - "11777 in in NaN \n", - "\n", - " structure.qualityscore structure.missing_residues \\\n", - "0 4.4 7 \n", - "1 8.0 0 \n", - "... ... ... \n", - "11776 6.8 2 \n", - "11777 6.8 2 \n", - "\n", - " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", - "0 22 0.950 2.319 \n", - "1 0 0.776 2.092 \n", - "... ... ... ... \n", - "11776 0 1.704 2.676 \n", - "11777 0 1.699 2.670 \n", - "\n", - " structure.front structure.gate structure.back structure.fp_i \\\n", - "0 False False False False \n", - "1 False False False False \n", - "... ... ... ... ... \n", - "11776 True False False False \n", - "11777 True True False False \n", - "\n", - " structure.fp_ii structure.bp_i_a structure.bp_i_b \\\n", - "0 False False False \n", - "1 False False False \n", - "... ... ... ... \n", - "11776 False False True \n", - "11777 False True True \n", - "\n", - " structure.bp_ii_in structure.bp_ii_a_in structure.bp_ii_b_in \\\n", - "0 False False False \n", - "1 False False False \n", - "... ... ... ... \n", - "11776 False False False \n", - "11777 False False False \n", - "\n", - " structure.bp_ii_out structure.bp_ii_b structure.bp_iii \\\n", - "0 False False False \n", - "1 False False False \n", - "... ... ... ... \n", - "11776 False False False \n", - "11777 False False False \n", - "\n", - " structure.bp_iv structure.bp_v structure.grich_distance \\\n", - "0 False False 18.979799 \n", - "1 False False 17.940701 \n", - "... ... ... ... \n", - "11776 False False 18.324301 \n", - "11777 False False 18.168600 \n", - "\n", - " structure.grich_angle structure.grich_rotation structure.filepath \n", - "0 65.341499 56.192699 \n", - "1 60.261398 66.135399 \n", - "... ... ... ... \n", - "11776 58.963501 131.186996 \n", - "11777 58.645401 136.503006 \n", - "\n", - "[11778 rows x 41 columns]" + "12519 _PMQSAAKAPYLAAIFKVGDCRQDMLALQIIDLFVFPYRVVCGVIE... E4S \n", + "12520 _PMQSAAKAPYLAAIFKVGDCRQDMLALQIIDLFVFPYRVVCGVIE... E4S \n", + "\n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - 2147 0 \n", + "1 - 0 0 \n", + "... ... ... ... \n", + "12519 - 2974 0 \n", + "12520 - 2974 0 \n", + "\n", + " ligand.name ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 in in \n", + "1 in in \n", + "... ... ... ... ... \n", + "12519 out-like in \n", + "12520 out-like in \n", + "\n", + " structure.resolution structure.qualityscore \\\n", + "0 2.0 8.0 \n", + "1 2.4 8.0 \n", + "... ... ... \n", + "12519 NaN 6.8 \n", + "12520 NaN 6.8 \n", + "\n", + " structure.missing_residues structure.missing_atoms structure.rmsd1 \\\n", + "0 0 0 0.779 \n", + "1 0 0 0.776 \n", + "... ... ... ... \n", + "12519 2 0 1.704 \n", + "12520 2 0 1.699 \n", + "\n", + " structure.rmsd2 interaction.fingerprint structure.front \\\n", + "0 2.093 True \n", + "1 2.091 False \n", + "... ... ... ... \n", + "12519 2.676 True \n", + "12520 2.670 True \n", + "\n", + " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", + "0 False False False False \n", + "1 False False False False \n", + "... ... ... ... ... \n", + "12519 False False False False \n", + "12520 True False False False \n", + "\n", + " structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", + "0 False False False \n", + "1 False False False \n", + "... ... ... ... \n", + "12519 False True False \n", + "12520 True True False \n", + "\n", + " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", + "0 False False False \n", + "1 False False False \n", + "... ... ... ... \n", + "12519 False False False \n", + "12520 False False False \n", + "\n", + " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \\\n", + "0 False False False False \n", + "1 False False False False \n", + "... ... ... ... ... \n", + "12519 False False False False \n", + "12520 False False False False \n", + "\n", + " structure.grich_distance structure.grich_angle \\\n", + "0 17.943501 58.283501 \n", + "1 17.901100 59.594799 \n", + "... ... ... \n", + "12519 18.324301 58.963501 \n", + "12520 18.168600 58.645401 \n", + "\n", + " structure.grich_rotation structure.filepath structure.curation_flag \n", + "0 52.774200 False \n", + "1 60.872501 False \n", + "... ... ... ... \n", + "12519 131.186996 False \n", + "12520 136.503006 False \n", + "\n", + "[12521 rows x 46 columns]" ] }, "execution_count": 54, @@ -4456,11 +4368,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -4471,6 +4386,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -4490,6 +4406,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -4502,11 +4419,14 @@ " Human\n", " 472\n", " BMX\n", + " ['BMX']\n", " Tec\n", " TK\n", " KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", " 1N1\n", " -\n", + " <NA>\n", + " <NA>\n", " N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX...\n", " -\n", " out-like\n", @@ -4517,6 +4437,7 @@ " 0\n", " 0.839\n", " 1.967\n", + " 0000000000000010000001000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -4536,6 +4457,7 @@ " NaN\n", " NaN\n", " HUMAN/BMX/3sxr_chainA\n", + " <NA>\n", " \n", " \n", " 1\n", @@ -4546,11 +4468,14 @@ " Human\n", " 509\n", " BRAF\n", + " ['BRAF']\n", " RAF\n", " TKL\n", " QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ...\n", " QH1\n", " -\n", + " <NA>\n", + " <NA>\n", " 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf...\n", " -\n", " in\n", @@ -4561,6 +4486,7 @@ " 61\n", " 0.806\n", " 2.028\n", + " 0000000000000000000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -4580,189 +4506,274 @@ " NaN\n", " NaN\n", " HUMAN/BRAF/6uuo_chainA\n", + " <NA>\n", " \n", " \n", - " 2\n", - " 5728\n", - " 1fpu\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 4\n", + " 13623\n", + " 7lht\n", " -\n", " A\n", - " Mouse\n", - " 532\n", - " ABL1\n", - " Abl\n", - " TK\n", - " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", - " PRC\n", + " Human\n", + " 495\n", + " LRRK2\n", + " ['LRRK2']\n", + " LRRK\n", + " TKL\n", + " KALGKGLFSMVIRITLKVVGLRILNLPHLILEYCKAKDIIRFLQQK...\n", " -\n", - " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", + " ATP\n", + " <NA>\n", + " <NA>\n", " -\n", + " ADENOSINE-5&apos;-TRIPHOSPHATE\n", " out\n", " out\n", - " 2.40\n", - " 8.8\n", + " 3.50\n", + " 4.0\n", " 0\n", - " 8\n", - " 0.925\n", - " 2.319\n", + " 0\n", + " 10.267\n", + " 15.045\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", " <NA>\n", " <NA>\n", " <NA>\n", - " False\n", - " False\n", - " True\n", - " True\n", - " False\n", - " False\n", - " False\n", - " True\n", - " False\n", - " False\n", - " False\n", - " False\n", " NaN\n", " NaN\n", " NaN\n", - " MOUSE/ABL1/1fpu_chainA\n", + " HUMAN/LRRK2/7lht_chainA\n", + " <NA>\n", " \n", " \n", - " 3\n", - " 5705\n", - " 1fpu\n", + " 5\n", + " 1243\n", + " 2ogv\n", " -\n", - " B\n", - " Mouse\n", - " 532\n", - " ABL1\n", - " Abl\n", + " A\n", + " Human\n", + " 449\n", + " FMS\n", + " ['CSF1R', 'FMS']\n", + " PDGFR\n", " TK\n", - " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", - " PRC\n", + " KTLGAGAFGKVVEVAVKMLALMSELKIMSHLGENIVNLLGALVITE...\n", + " -\n", + " -\n", + " <NA>\n", + " <NA>\n", " -\n", - " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", " -\n", " out\n", - " out\n", - " 2.40\n", - " 9.2\n", + " in\n", + " 2.70\n", + " 6.4\n", + " 2\n", " 0\n", - " 4\n", - " 0.925\n", - " 2.329\n", + " 1.043\n", + " 2.362\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", + " <NA>\n", " <NA>\n", " <NA>\n", " <NA>\n", - " False\n", - " False\n", - " False\n", - " True\n", - " False\n", - " False\n", - " False\n", - " True\n", - " False\n", - " False\n", - " False\n", - " False\n", " NaN\n", " NaN\n", " NaN\n", - " MOUSE/ABL1/1fpu_chainB\n", + " HUMAN/FMS/2ogv_chainA\n", + " <NA>\n", " \n", " \n", "\n", + "

6 rows × 46 columns

\n", "" ], "text/plain": [ - " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", - "0 3482 3sxr - \n", - "1 12347 6uuo - \n", - "2 5728 1fpu - \n", - "3 5705 1fpu - \n", - "\n", - " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", - "0 A Human 472 BMX \n", - "1 A Human 509 BRAF \n", - "2 A Mouse 532 ABL1 \n", - "3 B Mouse 532 ABL1 \n", - "\n", - " kinase.family kinase.group \\\n", - "0 Tec TK \n", - "1 RAF TKL \n", - "2 Abl TK \n", - "3 Abl TK \n", - "\n", - " structure.pocket ligand.expo_id \\\n", - "0 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", - "1 QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... QH1 \n", - "2 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", - "3 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", - "\n", - " ligand_allosteric.expo_id \\\n", - "0 - \n", - "1 - \n", - "2 - \n", - "3 - \n", - "\n", - " ligand.name ligand_allosteric.name \\\n", - "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... - \n", - "1 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf... - \n", - "2 N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]... - \n", - "3 N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]... - \n", - "\n", - " structure.dfg structure.ac_helix structure.resolution \\\n", - "0 out-like in 2.40 \n", - "1 in out-like 3.29 \n", - "2 out out 2.40 \n", - "3 out out 2.40 \n", - "\n", - " structure.qualityscore structure.missing_residues \\\n", - "0 6.4 4 \n", - "1 5.2 7 \n", - "2 8.8 0 \n", - "3 9.2 0 \n", - "\n", - " structure.missing_atoms structure.rmsd1 structure.rmsd2 structure.front \\\n", - "0 0 0.839 1.967 \n", - "1 61 0.806 2.028 \n", - "2 8 0.925 2.319 \n", - "3 4 0.925 2.329 \n", - "\n", - " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", - "0 False False \n", - "1 False False \n", - "2 False False \n", - "3 False False \n", + " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", + "0 3482 3sxr - \n", + "1 12347 6uuo - \n", + ".. ... ... ... \n", + "4 13623 7lht - \n", + "5 1243 2ogv - \n", "\n", - " structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", - "0 True True False \n", - "1 True True True \n", - "2 True True False \n", - "3 False True False \n", + " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", + "0 A Human 472 BMX \n", + "1 A Human 509 BRAF \n", + ".. ... ... ... ... \n", + "4 A Human 495 LRRK2 \n", + "5 A Human 449 FMS \n", "\n", - " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", - "0 False False False \n", - "1 True False False \n", - "2 False False True \n", - "3 False False True \n", + " kinase.names kinase.family kinase.group \\\n", + "0 ['BMX'] Tec TK \n", + "1 ['BRAF'] RAF TKL \n", + ".. ... ... ... \n", + "4 ['LRRK2'] LRRK TKL \n", + "5 ['CSF1R', 'FMS'] PDGFR TK \n", "\n", - " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \\\n", - "0 False False False False \n", - "1 False False False False \n", - "2 False False False False \n", - "3 False False False False \n", + " structure.pocket ligand.expo_id \\\n", + "0 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", + "1 QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... QH1 \n", + ".. ... ... \n", + "4 KALGKGLFSMVIRITLKVVGLRILNLPHLILEYCKAKDIIRFLQQK... - \n", + "5 KTLGAGAFGKVVEVAVKMLALMSELKIMSHLGENIVNLLGALVITE... - \n", "\n", - " structure.grich_distance structure.grich_angle structure.grich_rotation \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - \n", + "1 - \n", + ".. ... ... ... \n", + "4 ATP \n", + "5 - \n", "\n", - " structure.filepath \n", - "0 HUMAN/BMX/3sxr_chainA \n", - "1 HUMAN/BRAF/6uuo_chainA \n", - "2 MOUSE/ABL1/1fpu_chainA \n", - "3 MOUSE/ABL1/1fpu_chainB " + " ligand.name \\\n", + "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... \n", + "1 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf... \n", + ".. ... \n", + "4 - \n", + "5 - \n", + "\n", + " ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 - out-like in \n", + "1 - in out-like \n", + ".. ... ... ... \n", + "4 ADENOSINE-5'-TRIPHOSPHATE out out \n", + "5 - out in \n", + "\n", + " structure.resolution structure.qualityscore structure.missing_residues \\\n", + "0 2.40 6.4 4 \n", + "1 3.29 5.2 7 \n", + ".. ... ... ... \n", + "4 3.50 4.0 0 \n", + "5 2.70 6.4 2 \n", + "\n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 0 0.839 1.967 \n", + "1 61 0.806 2.028 \n", + ".. ... ... ... \n", + "4 0 10.267 15.045 \n", + "5 0 1.043 2.362 \n", + "\n", + " interaction.fingerprint structure.front \\\n", + "0 0000000000000010000001000000000000000000000000... \n", + "1 0000000000000000000000000000000000000000000000... \n", + ".. ... ... \n", + "4 \n", + "5 \n", + "\n", + " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", + "0 False False \n", + "1 False False \n", + ".. ... ... ... ... \n", + "4 \n", + "5 \n", + "\n", + " structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", + "0 True True False \n", + "1 True True True \n", + ".. ... ... ... \n", + "4 \n", + "5 \n", + "\n", + " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", + "0 False False False \n", + "1 True False False \n", + ".. ... ... ... \n", + "4 \n", + "5 \n", + "\n", + " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \\\n", + "0 False False False False \n", + "1 False False False False \n", + ".. ... ... ... ... \n", + "4 \n", + "5 \n", + "\n", + " structure.grich_distance structure.grich_angle structure.grich_rotation \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + ".. ... ... ... \n", + "4 NaN NaN NaN \n", + "5 NaN NaN NaN \n", + "\n", + " structure.filepath structure.curation_flag \n", + "0 HUMAN/BMX/3sxr_chainA \n", + "1 HUMAN/BRAF/6uuo_chainA \n", + ".. ... ... \n", + "4 HUMAN/LRRK2/7lht_chainA \n", + "5 HUMAN/FMS/2ogv_chainA \n", + "\n", + "[6 rows x 46 columns]" ] }, "execution_count": 55, @@ -4821,11 +4832,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -4836,6 +4850,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -4855,6 +4870,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -4869,19 +4885,23 @@ " BRAF\n", " <NA>\n", " <NA>\n", + " <NA>\n", " QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ...\n", " QH1\n", " -\n", + " 3715\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", - " out-like\n", + " out\n", " 3.29\n", " 5.2\n", " 7\n", " 61\n", " 0.806\n", " 2.028\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -4901,6 +4921,7 @@ " 0.0\n", " 0.0\n", " <NA>\n", + " False\n", " \n", " \n", "\n", @@ -4913,23 +4934,26 @@ " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", "0 A Human 509 BRAF \n", "\n", - " kinase.family kinase.group \\\n", - "0 \n", + " kinase.names kinase.family kinase.group \\\n", + "0 \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... QH1 \n", "\n", - " ligand_allosteric.expo_id ligand.name ligand_allosteric.name structure.dfg \\\n", - "0 - in \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - 3715 0 \n", "\n", - " structure.ac_helix structure.resolution structure.qualityscore \\\n", - "0 out-like 3.29 5.2 \n", + " ligand.name ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 in out \n", "\n", - " structure.missing_residues structure.missing_atoms structure.rmsd1 \\\n", - "0 7 61 0.806 \n", + " structure.resolution structure.qualityscore structure.missing_residues \\\n", + "0 3.29 5.2 7 \n", "\n", - " structure.rmsd2 structure.front structure.gate structure.back \\\n", - "0 2.028 True True True \n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 61 0.806 2.028 \n", + "\n", + " interaction.fingerprint structure.front structure.gate structure.back \\\n", + "0 True True True \n", "\n", " structure.fp_i structure.fp_ii structure.bp_i_a structure.bp_i_b \\\n", "0 False False True True \n", @@ -4943,8 +4967,8 @@ " structure.bp_v structure.grich_distance structure.grich_angle \\\n", "0 False 0.0 0.0 \n", "\n", - " structure.grich_rotation structure.filepath \n", - "0 0.0 " + " structure.grich_rotation structure.filepath structure.curation_flag \n", + "0 0.0 False " ] }, "execution_count": 56, @@ -4965,7 +4989,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Expected type to be dict for value [400, 'KLIFS error: An unknown ligand ID was provided'] to unmarshal to a .Was instead.\n" + "Expected type to be dict for value [400, 'KLIFS error: An unknown structure ID was provided'] to unmarshal to a .Was instead.\n" ] } ], @@ -5016,11 +5040,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -5031,6 +5058,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -5050,6 +5078,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -5062,11 +5091,14 @@ " Human\n", " 509\n", " BRAF\n", + " ['BRAF']\n", " RAF\n", " TKL\n", " QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ...\n", " QH1\n", " -\n", + " <NA>\n", + " <NA>\n", " 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf...\n", " -\n", " in\n", @@ -5077,6 +5109,7 @@ " 61\n", " 0.806\n", " 2.028\n", + " 0000000000000000000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -5096,6 +5129,7 @@ " NaN\n", " NaN\n", " HUMAN/BRAF/6uuo_chainA\n", + " <NA>\n", " \n", " \n", "\n", @@ -5108,14 +5142,14 @@ " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", "0 A Human 509 BRAF \n", "\n", - " kinase.family kinase.group \\\n", - "0 RAF TKL \n", + " kinase.names kinase.family kinase.group \\\n", + "0 ['BRAF'] RAF TKL \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... QH1 \n", "\n", - " ligand_allosteric.expo_id \\\n", - "0 - \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - \n", "\n", " ligand.name ligand_allosteric.name \\\n", "0 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf... - \n", @@ -5126,8 +5160,11 @@ " structure.qualityscore structure.missing_residues \\\n", "0 5.2 7 \n", "\n", - " structure.missing_atoms structure.rmsd1 structure.rmsd2 structure.front \\\n", - "0 61 0.806 2.028 \n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 61 0.806 2.028 \n", + "\n", + " interaction.fingerprint structure.front \\\n", + "0 0000000000000000000000000000000000000000000000... \n", "\n", " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", "0 False False \n", @@ -5144,8 +5181,8 @@ " structure.grich_distance structure.grich_angle structure.grich_rotation \\\n", "0 NaN NaN NaN \n", "\n", - " structure.filepath \n", - "0 HUMAN/BRAF/6uuo_chainA " + " structure.filepath structure.curation_flag \n", + "0 HUMAN/BRAF/6uuo_chainA " ] }, "execution_count": 58, @@ -5165,7 +5202,7 @@ { "data": { "text/plain": [ - "(1, 41)" + "(1, 46)" ] }, "execution_count": 59, @@ -5216,13 +5253,6 @@ "execution_count": 61, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:opencadd.databases.klifs.remote:This method uses this lookup: ligand KLIFS ID > Ligand Expo ID > structures.The KLIFS Swagger API offers no direct structure search by ligand KLIFS ID.However, one Ligand Expo ID can be represented by multiple ligand KLIFS IDs. Thus, in rare cases, this method will return also structure that are not connected to the input ligand KLIFS ID but to a mutual Ligand Expo ID.\n" - ] - }, { "data": { "text/html": [ @@ -5251,11 +5281,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -5266,6 +5299,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -5285,23 +5319,27 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", " \n", " 0\n", - " 8733\n", + " 8735\n", " 5m5a\n", - " B\n", + " A\n", " A\n", " Human\n", " 128\n", " MELK\n", " <NA>\n", " <NA>\n", + " <NA>\n", " ETIGTGGFAKVKLVAIKIMRIKTEIEALKNLRQHICQLYHVFMVLE...\n", " KSA\n", " -\n", + " 100\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -5312,6 +5350,7 @@ " 0\n", " 0.775\n", " 2.083\n", + " <NA>\n", " True\n", " False\n", " False\n", @@ -5331,21 +5370,25 @@ " 51.023800\n", " 60.378601\n", " <NA>\n", + " False\n", " \n", " \n", " 1\n", - " 8735\n", + " 8733\n", " 5m5a\n", - " A\n", + " B\n", " A\n", " Human\n", " 128\n", " MELK\n", " <NA>\n", " <NA>\n", + " <NA>\n", " ETIGTGGFAKVKLVAIKIMRIKTEIEALKNLRQHICQLYHVFMVLE...\n", " KSA\n", " -\n", + " 100\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -5356,6 +5399,7 @@ " 0\n", " 0.775\n", " 2.083\n", + " <NA>\n", " True\n", " False\n", " False\n", @@ -5375,6 +5419,7 @@ " 51.023800\n", " 60.378601\n", " <NA>\n", + " False\n", " \n", " \n", " ...\n", @@ -5419,6 +5464,11 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", " 12\n", @@ -5431,9 +5481,12 @@ " MAP2K1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " SELGAGNGGVVFKMARKLIQIIRELQVLHECNPYIVGFYGASICME...\n", " KSA\n", " -\n", + " 100\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -5444,6 +5497,7 @@ " 0\n", " 0.816\n", " 2.199\n", + " <NA>\n", " True\n", " False\n", " False\n", @@ -5463,6 +5517,7 @@ " 51.288502\n", " 69.366898\n", " <NA>\n", + " False\n", " \n", " \n", " 13\n", @@ -5475,9 +5530,12 @@ " MET\n", " <NA>\n", " <NA>\n", + " <NA>\n", " EVIGRGHFGCVYHCAVKSLQFLTEGIIMKDFSPNVLSLLGILVVLP...\n", " KSA\n", " -\n", + " 100\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -5488,6 +5546,7 @@ " 0\n", " 0.848\n", " 2.189\n", + " <NA>\n", " True\n", " False\n", " False\n", @@ -5507,16 +5566,17 @@ " 66.349403\n", " 48.160198\n", " <NA>\n", + " False\n", " \n", " \n", "\n", - "

14 rows × 41 columns

\n", + "

14 rows × 46 columns

\n", "" ], "text/plain": [ " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", - "0 8733 5m5a B \n", - "1 8735 5m5a A \n", + "0 8735 5m5a A \n", + "1 8733 5m5a B \n", ".. ... ... ... \n", "12 3317 3eqf - \n", "13 2991 1r0p - \n", @@ -5528,12 +5588,12 @@ "12 A Human 383 MAP2K1 \n", "13 A Human 446 MET \n", "\n", - " kinase.family kinase.group \\\n", - "0 \n", - "1 \n", - ".. ... ... \n", - "12 \n", - "13 \n", + " kinase.names kinase.family kinase.group \\\n", + "0 \n", + "1 \n", + ".. ... ... ... \n", + "12 \n", + "13 \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 ETIGTGGFAKVKLVAIKIMRIKTEIEALKNLRQHICQLYHVFMVLE... KSA \n", @@ -5542,33 +5602,40 @@ "12 SELGAGNGGVVFKMARKLIQIIRELQVLHECNPYIVGFYGASICME... KSA \n", "13 EVIGRGHFGCVYHCAVKSLQFLTEGIIMKDFSPNVLSLLGILVVLP... KSA \n", "\n", - " ligand_allosteric.expo_id ligand.name ligand_allosteric.name structure.dfg \\\n", - "0 - in \n", - "1 - in \n", - ".. ... ... ... ... \n", - "12 - in \n", - "13 - in \n", - "\n", - " structure.ac_helix structure.resolution structure.qualityscore \\\n", - "0 in 1.9 8.0 \n", - "1 in 1.9 8.0 \n", - ".. ... ... ... \n", - "12 out 2.7 8.0 \n", - "13 out 1.8 8.0 \n", - "\n", - " structure.missing_residues structure.missing_atoms structure.rmsd1 \\\n", - "0 0 0 0.775 \n", - "1 0 0 0.775 \n", - ".. ... ... ... \n", - "12 0 0 0.816 \n", - "13 0 0 0.848 \n", - "\n", - " structure.rmsd2 structure.front structure.gate structure.back \\\n", - "0 2.083 True False False \n", - "1 2.083 True False False \n", - ".. ... ... ... ... \n", - "12 2.199 True False False \n", - "13 2.189 True False False \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - 100 0 \n", + "1 - 100 0 \n", + ".. ... ... ... \n", + "12 - 100 0 \n", + "13 - 100 0 \n", + "\n", + " ligand.name ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 in in \n", + "1 in in \n", + ".. ... ... ... ... \n", + "12 in out \n", + "13 in out \n", + "\n", + " structure.resolution structure.qualityscore structure.missing_residues \\\n", + "0 1.9 8.0 0 \n", + "1 1.9 8.0 0 \n", + ".. ... ... ... \n", + "12 2.7 8.0 0 \n", + "13 1.8 8.0 0 \n", + "\n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 0 0.775 2.083 \n", + "1 0 0.775 2.083 \n", + ".. ... ... ... \n", + "12 0 0.816 2.199 \n", + "13 0 0.848 2.189 \n", + "\n", + " interaction.fingerprint structure.front structure.gate structure.back \\\n", + "0 True False False \n", + "1 True False False \n", + ".. ... ... ... ... \n", + "12 True False False \n", + "13 True False False \n", "\n", " structure.fp_i structure.fp_ii structure.bp_i_a structure.bp_i_b \\\n", "0 False False False False \n", @@ -5598,14 +5665,14 @@ "12 False 15.1080 51.288502 \n", "13 False 20.0054 66.349403 \n", "\n", - " structure.grich_rotation structure.filepath \n", - "0 60.378601 \n", - "1 60.378601 \n", - ".. ... ... \n", - "12 69.366898 \n", - "13 48.160198 \n", + " structure.grich_rotation structure.filepath structure.curation_flag \n", + "0 60.378601 False \n", + "1 60.378601 False \n", + ".. ... ... ... \n", + "12 69.366898 False \n", + "13 48.160198 False \n", "\n", - "[14 rows x 41 columns]" + "[14 rows x 46 columns]" ] }, "execution_count": 61, @@ -5622,13 +5689,6 @@ "execution_count": 62, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:opencadd.databases.klifs.remote:This method uses this lookup: ligand KLIFS ID > Ligand Expo ID > structures.The KLIFS Swagger API offers no direct structure search by ligand KLIFS ID.However, one Ligand Expo ID can be represented by multiple ligand KLIFS IDs. Thus, in rare cases, this method will return also structure that are not connected to the input ligand KLIFS ID but to a mutual Ligand Expo ID.\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -5700,11 +5760,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -5715,6 +5778,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -5734,13 +5798,14 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", " \n", " 0\n", - " 3482\n", - " 3sxr\n", + " 3483\n", + " 3sxs\n", " -\n", " A\n", " Human\n", @@ -5748,25 +5813,29 @@ " BMX\n", " <NA>\n", " <NA>\n", - " KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", - " 1N1\n", + " <NA>\n", + " KELGSGQFGVVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", + " PP2\n", " -\n", + " 632\n", + " 0\n", " <NA>\n", " <NA>\n", " out-like\n", " in\n", - " 2.40\n", - " 6.4\n", - " 4\n", + " 1.89\n", + " 8.0\n", " 0\n", - " 0.839\n", - " 1.967\n", + " 0\n", + " 0.834\n", + " 2.106\n", + " <NA>\n", " True\n", " True\n", " False\n", " False\n", " False\n", - " True\n", + " False\n", " True\n", " False\n", " False\n", @@ -5776,41 +5845,46 @@ " False\n", " False\n", " False\n", - " 0.0000\n", - " 0.000000\n", - " 0.000000\n", + " 15.3623\n", + " 50.710098\n", + " 62.744400\n", " <NA>\n", + " False\n", " \n", " \n", " 1\n", - " 3483\n", - " 3sxs\n", + " 3481\n", + " 3sxr\n", " -\n", - " A\n", + " B\n", " Human\n", " 472\n", " BMX\n", " <NA>\n", " <NA>\n", - " KELGSGQFGVVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", - " PP2\n", + " <NA>\n", + " KEL______VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", + " 1N1\n", " -\n", + " 281\n", + " 0\n", " <NA>\n", " <NA>\n", " out-like\n", " in\n", - " 1.89\n", - " 8.0\n", - " 0\n", + " 2.40\n", + " 5.6\n", + " 6\n", " 0\n", - " 0.834\n", - " 2.106\n", + " 0.839\n", + " 1.946\n", + " <NA>\n", " True\n", " True\n", " False\n", " False\n", " False\n", - " False\n", + " True\n", " True\n", " False\n", " False\n", @@ -5820,10 +5894,11 @@ " False\n", " False\n", " False\n", - " 15.3623\n", - " 50.710098\n", - " 62.744400\n", + " 0.0000\n", + " 0.000000\n", + " 0.000000\n", " <NA>\n", + " False\n", " \n", " \n", " ...\n", @@ -5868,9 +5943,14 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 209\n", + " 225\n", " 3126\n", " 4pp7\n", " -\n", @@ -5880,9 +5960,12 @@ " BRAF\n", " <NA>\n", " <NA>\n", + " <NA>\n", " QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ...\n", " 2VX\n", " -\n", + " 945\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -5893,6 +5976,7 @@ " 0\n", " 0.813\n", " 2.142\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -5912,9 +5996,10 @@ " 55.548302\n", " 31.851000\n", " <NA>\n", + " False\n", " \n", " \n", - " 210\n", + " 226\n", " 3095\n", " 3d4q\n", " -\n", @@ -5924,9 +6009,12 @@ " BRAF\n", " <NA>\n", " <NA>\n", + " <NA>\n", " QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ...\n", " SM5\n", " -\n", + " 249\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -5937,6 +6025,7 @@ " 0\n", " 0.779\n", " 2.120\n", + " <NA>\n", " True\n", " True\n", " False\n", @@ -5956,112 +6045,120 @@ " 49.465599\n", " 72.604401\n", " <NA>\n", + " False\n", " \n", " \n", "\n", - "

211 rows × 41 columns

\n", + "

227 rows × 46 columns

\n", "" ], "text/plain": [ " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", - "0 3482 3sxr - \n", - "1 3483 3sxs - \n", + "0 3483 3sxs - \n", + "1 3481 3sxr - \n", ".. ... ... ... \n", - "209 3126 4pp7 - \n", - "210 3095 3d4q - \n", + "225 3126 4pp7 - \n", + "226 3095 3d4q - \n", "\n", " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", "0 A Human 472 BMX \n", - "1 A Human 472 BMX \n", + "1 B Human 472 BMX \n", ".. ... ... ... ... \n", - "209 B Human 509 BRAF \n", - "210 A Human 509 BRAF \n", + "225 B Human 509 BRAF \n", + "226 A Human 509 BRAF \n", "\n", - " kinase.family kinase.group \\\n", - "0 \n", - "1 \n", - ".. ... ... \n", - "209 \n", - "210 \n", + " kinase.names kinase.family kinase.group \\\n", + "0 \n", + "1 \n", + ".. ... ... ... \n", + "225 \n", + "226 \n", "\n", " structure.pocket ligand.expo_id \\\n", - "0 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", - "1 KELGSGQFGVVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... PP2 \n", + "0 KELGSGQFGVVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... PP2 \n", + "1 KEL______VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", ".. ... ... \n", - "209 QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... 2VX \n", - "210 QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... SM5 \n", - "\n", - " ligand_allosteric.expo_id ligand.name ligand_allosteric.name \\\n", - "0 - \n", - "1 - \n", - ".. ... ... ... \n", - "209 - \n", - "210 - \n", - "\n", - " structure.dfg structure.ac_helix structure.resolution \\\n", - "0 out-like in 2.40 \n", - "1 out-like in 1.89 \n", - ".. ... ... ... \n", - "209 in out 3.40 \n", - "210 in in 2.80 \n", - "\n", - " structure.qualityscore structure.missing_residues \\\n", - "0 6.4 4 \n", - "1 8.0 0 \n", - ".. ... ... \n", - "209 8.0 0 \n", - "210 8.0 0 \n", + "225 QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... 2VX \n", + "226 QRIGSGSFGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... SM5 \n", + "\n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - 632 0 \n", + "1 - 281 0 \n", + ".. ... ... ... \n", + "225 - 945 0 \n", + "226 - 249 0 \n", + "\n", + " ligand.name ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 out-like in \n", + "1 out-like in \n", + ".. ... ... ... ... \n", + "225 in out \n", + "226 in in \n", + "\n", + " structure.resolution structure.qualityscore structure.missing_residues \\\n", + "0 1.89 8.0 0 \n", + "1 2.40 5.6 6 \n", + ".. ... ... ... \n", + "225 3.40 8.0 0 \n", + "226 2.80 8.0 0 \n", "\n", " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", - "0 0 0.839 1.967 \n", - "1 0 0.834 2.106 \n", + "0 0 0.834 2.106 \n", + "1 0 0.839 1.946 \n", ".. ... ... ... \n", - "209 0 0.813 2.142 \n", - "210 0 0.779 2.120 \n", - "\n", - " structure.front structure.gate structure.back structure.fp_i \\\n", - "0 True True False False \n", - "1 True True False False \n", - ".. ... ... ... ... \n", - "209 True True True True \n", - "210 True True False False \n", - "\n", - " structure.fp_ii structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", - "0 False True True False \n", - "1 False False True False \n", - ".. ... ... ... ... \n", - "209 False True True True \n", - "210 False False True False \n", - "\n", - " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", - "0 False False False \n", - "1 False False False \n", - ".. ... ... ... \n", - "209 False False False \n", - "210 False False False \n", - "\n", - " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \\\n", - "0 False False False False \n", - "1 False False False False \n", - ".. ... ... ... ... \n", - "209 False False False False \n", - "210 False False False False \n", - "\n", - " structure.grich_distance structure.grich_angle \\\n", - "0 0.0000 0.000000 \n", - "1 15.3623 50.710098 \n", - ".. ... ... \n", - "209 17.1234 55.548302 \n", - "210 14.7287 49.465599 \n", - "\n", - " structure.grich_rotation structure.filepath \n", - "0 0.000000 \n", - "1 62.744400 \n", - ".. ... ... \n", - "209 31.851000 \n", - "210 72.604401 \n", - "\n", - "[211 rows x 41 columns]" + "225 0 0.813 2.142 \n", + "226 0 0.779 2.120 \n", + "\n", + " interaction.fingerprint structure.front structure.gate structure.back \\\n", + "0 True True False \n", + "1 True True False \n", + ".. ... ... ... ... \n", + "225 True True True \n", + "226 True True False \n", + "\n", + " structure.fp_i structure.fp_ii structure.bp_i_a structure.bp_i_b \\\n", + "0 False False False True \n", + "1 False False True True \n", + ".. ... ... ... ... \n", + "225 True False True True \n", + "226 False False False True \n", + "\n", + " structure.bp_ii_in structure.bp_ii_a_in structure.bp_ii_b_in \\\n", + "0 False False False \n", + "1 False False False \n", + ".. ... ... ... \n", + "225 True False False \n", + "226 False False False \n", + "\n", + " structure.bp_ii_out structure.bp_ii_b structure.bp_iii \\\n", + "0 False False False \n", + "1 False False False \n", + ".. ... ... ... \n", + "225 False False False \n", + "226 False False False \n", + "\n", + " structure.bp_iv structure.bp_v structure.grich_distance \\\n", + "0 False False 15.3623 \n", + "1 False False 0.0000 \n", + ".. ... ... ... \n", + "225 False False 17.1234 \n", + "226 False False 14.7287 \n", + "\n", + " structure.grich_angle structure.grich_rotation structure.filepath \\\n", + "0 50.710098 62.744400 \n", + "1 0.000000 0.000000 \n", + ".. ... ... ... \n", + "225 55.548302 31.851000 \n", + "226 49.465599 72.604401 \n", + "\n", + " structure.curation_flag \n", + "0 False \n", + "1 False \n", + ".. ... \n", + "225 False \n", + "226 False \n", + "\n", + "[227 rows x 46 columns]" ] }, "execution_count": 63, @@ -6133,11 +6230,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -6148,6 +6248,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -6167,6 +6268,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -6179,11 +6281,14 @@ " Human\n", " 472\n", " BMX\n", + " ['BMX']\n", " Tec\n", " TK\n", " KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", " 1N1\n", " -\n", + " <NA>\n", + " <NA>\n", " N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX...\n", " -\n", " out-like\n", @@ -6194,6 +6299,7 @@ " 0\n", " 0.839\n", " 1.967\n", + " 0000000000000010000001000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -6213,6 +6319,7 @@ " NaN\n", " NaN\n", " HUMAN/BMX/3sxr_chainA\n", + " <NA>\n", " \n", " \n", " 1\n", @@ -6223,11 +6330,14 @@ " Human\n", " 509\n", " BRAF\n", + " ['BRAF']\n", " RAF\n", " TKL\n", " QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ...\n", " QH1\n", " -\n", + " <NA>\n", + " <NA>\n", " 1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulf...\n", " -\n", " in\n", @@ -6238,6 +6348,7 @@ " 61\n", " 0.806\n", " 2.028\n", + " 0000000000000000000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -6257,6 +6368,7 @@ " NaN\n", " NaN\n", " HUMAN/BRAF/6uuo_chainA\n", + " <NA>\n", " \n", " \n", "\n", @@ -6271,17 +6383,17 @@ "0 A Human 472 BMX \n", "1 A Human 509 BRAF \n", "\n", - " kinase.family kinase.group \\\n", - "0 Tec TK \n", - "1 RAF TKL \n", + " kinase.names kinase.family kinase.group \\\n", + "0 ['BMX'] Tec TK \n", + "1 ['BRAF'] RAF TKL \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", "1 QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQ... QH1 \n", "\n", - " ligand_allosteric.expo_id \\\n", - "0 - \n", - "1 - \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - \n", + "1 - \n", "\n", " ligand.name ligand_allosteric.name \\\n", "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... - \n", @@ -6295,9 +6407,13 @@ "0 6.4 4 \n", "1 5.2 7 \n", "\n", - " structure.missing_atoms structure.rmsd1 structure.rmsd2 structure.front \\\n", - "0 0 0.839 1.967 \n", - "1 61 0.806 2.028 \n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 0 0.839 1.967 \n", + "1 61 0.806 2.028 \n", + "\n", + " interaction.fingerprint structure.front \\\n", + "0 0000000000000010000001000000000000000000000000... \n", + "1 0000000000000000000000000000000000000000000000... \n", "\n", " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", "0 False False \n", @@ -6319,9 +6435,9 @@ "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "\n", - " structure.filepath \n", - "0 HUMAN/BMX/3sxr_chainA \n", - "1 HUMAN/BRAF/6uuo_chainA " + " structure.filepath structure.curation_flag \n", + "0 HUMAN/BMX/3sxr_chainA \n", + "1 HUMAN/BRAF/6uuo_chainA " ] }, "execution_count": 65, @@ -6400,11 +6516,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -6415,6 +6534,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -6434,6 +6554,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -6448,19 +6569,23 @@ " ABL1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " 1793\n", + " 0\n", " <NA>\n", " <NA>\n", " out\n", - " out\n", + " in\n", " 2.4\n", " 9.2\n", " 0\n", " 4\n", " 0.925\n", " 2.329\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -6480,6 +6605,7 @@ " 51.093899\n", " 49.228901\n", " <NA>\n", + " False\n", " \n", " \n", " 1\n", @@ -6492,19 +6618,23 @@ " ABL1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " 1793\n", + " 0\n", " <NA>\n", " <NA>\n", " out\n", - " out\n", + " in\n", " 2.4\n", " 8.8\n", " 0\n", " 8\n", " 0.925\n", " 2.319\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -6524,6 +6654,7 @@ " 54.140202\n", " 48.741402\n", " <NA>\n", + " False\n", " \n", " \n", " 2\n", @@ -6536,9 +6667,12 @@ " BMX\n", " <NA>\n", " <NA>\n", + " <NA>\n", " KEL______VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", " 1N1\n", " -\n", + " 281\n", + " 0\n", " <NA>\n", " <NA>\n", " out-like\n", @@ -6549,6 +6683,7 @@ " 0\n", " 0.839\n", " 1.946\n", + " <NA>\n", " True\n", " True\n", " False\n", @@ -6568,6 +6703,7 @@ " 0.000000\n", " 0.000000\n", " <NA>\n", + " False\n", " \n", " \n", " 3\n", @@ -6580,9 +6716,12 @@ " BMX\n", " <NA>\n", " <NA>\n", + " <NA>\n", " KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", " 1N1\n", " -\n", + " 281\n", + " 0\n", " <NA>\n", " <NA>\n", " out-like\n", @@ -6593,6 +6732,7 @@ " 0\n", " 0.839\n", " 1.967\n", + " <NA>\n", " True\n", " True\n", " False\n", @@ -6612,6 +6752,7 @@ " 0.000000\n", " 0.000000\n", " <NA>\n", + " False\n", " \n", " \n", "\n", @@ -6630,11 +6771,11 @@ "2 B Human 472 BMX \n", "3 A Human 472 BMX \n", "\n", - " kinase.family kinase.group \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", + " kinase.names kinase.family kinase.group \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", @@ -6642,29 +6783,35 @@ "2 KEL______VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", "3 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", "\n", - " ligand_allosteric.expo_id ligand.name ligand_allosteric.name structure.dfg \\\n", - "0 - out \n", - "1 - out \n", - "2 - out-like \n", - "3 - out-like \n", - "\n", - " structure.ac_helix structure.resolution structure.qualityscore \\\n", - "0 out 2.4 9.2 \n", - "1 out 2.4 8.8 \n", - "2 in 2.4 5.6 \n", - "3 in 2.4 6.4 \n", - "\n", - " structure.missing_residues structure.missing_atoms structure.rmsd1 \\\n", - "0 0 4 0.925 \n", - "1 0 8 0.925 \n", - "2 6 0 0.839 \n", - "3 4 0 0.839 \n", - "\n", - " structure.rmsd2 structure.front structure.gate structure.back \\\n", - "0 2.329 True True True \n", - "1 2.319 True True True \n", - "2 1.946 True True False \n", - "3 1.967 True True False \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - 1793 0 \n", + "1 - 1793 0 \n", + "2 - 281 0 \n", + "3 - 281 0 \n", + "\n", + " ligand.name ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 out in \n", + "1 out in \n", + "2 out-like in \n", + "3 out-like in \n", + "\n", + " structure.resolution structure.qualityscore structure.missing_residues \\\n", + "0 2.4 9.2 0 \n", + "1 2.4 8.8 0 \n", + "2 2.4 5.6 6 \n", + "3 2.4 6.4 4 \n", + "\n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 4 0.925 2.329 \n", + "1 8 0.925 2.319 \n", + "2 0 0.839 1.946 \n", + "3 0 0.839 1.967 \n", + "\n", + " interaction.fingerprint structure.front structure.gate structure.back \\\n", + "0 True True True \n", + "1 True True True \n", + "2 True True False \n", + "3 True True False \n", "\n", " structure.fp_i structure.fp_ii structure.bp_i_a structure.bp_i_b \\\n", "0 False False False True \n", @@ -6690,11 +6837,11 @@ "2 False 0.000000 0.000000 \n", "3 False 0.000000 0.000000 \n", "\n", - " structure.grich_rotation structure.filepath \n", - "0 49.228901 \n", - "1 48.741402 \n", - "2 0.000000 \n", - "3 0.000000 " + " structure.grich_rotation structure.filepath structure.curation_flag \n", + "0 49.228901 False \n", + "1 48.741402 False \n", + "2 0.000000 False \n", + "3 0.000000 False " ] }, "execution_count": 67, @@ -6766,11 +6913,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -6781,6 +6931,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -6800,6 +6951,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -6812,11 +6964,14 @@ " Human\n", " 472\n", " BMX\n", + " ['BMX']\n", " Tec\n", " TK\n", " KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", " 1N1\n", " -\n", + " <NA>\n", + " <NA>\n", " N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX...\n", " -\n", " out-like\n", @@ -6827,6 +6982,7 @@ " 0\n", " 0.839\n", " 1.967\n", + " 0000000000000010000001000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -6846,6 +7002,7 @@ " NaN\n", " NaN\n", " HUMAN/BMX/3sxr_chainA\n", + " <NA>\n", " \n", " \n", " 1\n", @@ -6856,11 +7013,14 @@ " Mouse\n", " 532\n", " ABL1\n", + " ['Abl1', 'ABL1']\n", " Abl\n", " TK\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " <NA>\n", + " <NA>\n", " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", " -\n", " out\n", @@ -6871,6 +7031,7 @@ " 8\n", " 0.925\n", " 2.319\n", + " 0000000000000010000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -6890,6 +7051,7 @@ " NaN\n", " NaN\n", " MOUSE/ABL1/1fpu_chainA\n", + " <NA>\n", " \n", " \n", " 2\n", @@ -6900,11 +7062,14 @@ " Mouse\n", " 532\n", " ABL1\n", + " ['Abl1', 'ABL1']\n", " Abl\n", " TK\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " <NA>\n", + " <NA>\n", " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", " -\n", " out\n", @@ -6915,6 +7080,7 @@ " 4\n", " 0.925\n", " 2.329\n", + " 0000000000000010000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -6934,6 +7100,7 @@ " NaN\n", " NaN\n", " MOUSE/ABL1/1fpu_chainB\n", + " <NA>\n", " \n", " \n", "\n", @@ -6950,20 +7117,20 @@ "1 A Mouse 532 ABL1 \n", "2 B Mouse 532 ABL1 \n", "\n", - " kinase.family kinase.group \\\n", - "0 Tec TK \n", - "1 Abl TK \n", - "2 Abl TK \n", + " kinase.names kinase.family kinase.group \\\n", + "0 ['BMX'] Tec TK \n", + "1 ['Abl1', 'ABL1'] Abl TK \n", + "2 ['Abl1', 'ABL1'] Abl TK \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", "1 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", "2 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", "\n", - " ligand_allosteric.expo_id \\\n", - "0 - \n", - "1 - \n", - "2 - \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - \n", + "1 - \n", + "2 - \n", "\n", " ligand.name ligand_allosteric.name \\\n", "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... - \n", @@ -6980,10 +7147,15 @@ "1 8.8 0 \n", "2 9.2 0 \n", "\n", - " structure.missing_atoms structure.rmsd1 structure.rmsd2 structure.front \\\n", - "0 0 0.839 1.967 \n", - "1 8 0.925 2.319 \n", - "2 4 0.925 2.329 \n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 0 0.839 1.967 \n", + "1 8 0.925 2.319 \n", + "2 4 0.925 2.329 \n", + "\n", + " interaction.fingerprint structure.front \\\n", + "0 0000000000000010000001000000000000000000000000... \n", + "1 0000000000000010000000000000000000000000000000... \n", + "2 0000000000000010000000000000000000000000000000... \n", "\n", " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", "0 False False \n", @@ -7010,10 +7182,10 @@ "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "\n", - " structure.filepath \n", - "0 HUMAN/BMX/3sxr_chainA \n", - "1 MOUSE/ABL1/1fpu_chainA \n", - "2 MOUSE/ABL1/1fpu_chainB " + " structure.filepath structure.curation_flag \n", + "0 HUMAN/BMX/3sxr_chainA \n", + "1 MOUSE/ABL1/1fpu_chainA \n", + "2 MOUSE/ABL1/1fpu_chainB " ] }, "execution_count": 69, @@ -7092,11 +7264,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -7107,6 +7282,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -7126,6 +7302,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -7140,19 +7317,23 @@ " p38a\n", " <NA>\n", " <NA>\n", + " <NA>\n", " SPVGS__YGSVCAVAVKKLRTYRELRLLKHMKENVIGLLDVYLVTH...\n", " 1N1\n", " -\n", + " 281\n", + " 0\n", " <NA>\n", " <NA>\n", " na\n", - " na\n", + " in\n", " 2.10\n", " 6.1\n", " 5\n", " 15\n", " 0.765\n", " 2.173\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -7172,6 +7353,7 @@ " 0.000000\n", " 0.000000\n", " <NA>\n", + " False\n", " \n", " \n", " 1\n", @@ -7184,9 +7366,12 @@ " MYT1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " SRLGHGSYGEVFKYAVKRSRKLAEVGSHEKVGPCCVRLEQAYLQTE...\n", " 1N1\n", " -\n", + " 281\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -7197,6 +7382,7 @@ " 0\n", " 0.775\n", " 2.104\n", + " <NA>\n", " True\n", " True\n", " False\n", @@ -7216,6 +7402,7 @@ " 59.874401\n", " 54.618500\n", " <NA>\n", + " False\n", " \n", " \n", " ...\n", @@ -7260,21 +7447,29 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", " 37\n", - " 5811\n", + " 5814\n", " 2y6o\n", - " B\n", + " A\n", " A\n", " Mouse\n", " 668\n", " EphA4\n", " <NA>\n", " <NA>\n", + " <NA>\n", " KVIGVGEFGEVCSVAIKTLDFLSEASIMGQFDPNIIHLEGVMIITE...\n", " 1N1\n", " -\n", + " 281\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -7283,8 +7478,9 @@ " 8.7\n", " 3\n", " 1\n", - " 0.782\n", - " 2.117\n", + " 0.783\n", + " 2.116\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -7301,9 +7497,10 @@ " False\n", " False\n", " 19.312901\n", - " 61.760201\n", - " 52.242699\n", + " 61.760502\n", + " 52.242401\n", " <NA>\n", + " False\n", " \n", " \n", " 38\n", @@ -7316,9 +7513,12 @@ " LYN\n", " <NA>\n", " <NA>\n", + " <NA>\n", " KKLGAGQFGEVWMVAVKTLAFLEEANLMKTLQDKLVRLYAVYIITE...\n", " 1N1\n", " -\n", + " 281\n", + " 0\n", " <NA>\n", " <NA>\n", " in\n", @@ -7329,6 +7529,7 @@ " 0\n", " 0.780\n", " 2.099\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -7348,10 +7549,11 @@ " 62.362701\n", " 53.872799\n", " <NA>\n", + " False\n", " \n", " \n", "\n", - "

39 rows × 41 columns

\n", + "

39 rows × 46 columns

\n", "" ], "text/plain": [ @@ -7359,7 +7561,7 @@ "0 5018 3lfa - \n", "1 8339 5vcv - \n", ".. ... ... ... \n", - "37 5811 2y6o B \n", + "37 5814 2y6o A \n", "38 5774 2zva - \n", "\n", " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", @@ -7369,12 +7571,12 @@ "37 A Mouse 668 EphA4 \n", "38 A Mouse 767 LYN \n", "\n", - " kinase.family kinase.group \\\n", - "0 \n", - "1 \n", - ".. ... ... \n", - "37 \n", - "38 \n", + " kinase.names kinase.family kinase.group \\\n", + "0 \n", + "1 \n", + ".. ... ... ... \n", + "37 \n", + "38 \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 SPVGS__YGSVCAVAVKKLRTYRELRLLKHMKENVIGLLDVYLVTH... 1N1 \n", @@ -7383,33 +7585,40 @@ "37 KVIGVGEFGEVCSVAIKTLDFLSEASIMGQFDPNIIHLEGVMIITE... 1N1 \n", "38 KKLGAGQFGEVWMVAVKTLAFLEEANLMKTLQDKLVRLYAVYIITE... 1N1 \n", "\n", - " ligand_allosteric.expo_id ligand.name ligand_allosteric.name structure.dfg \\\n", - "0 - na \n", - "1 - in \n", - ".. ... ... ... ... \n", - "37 - in \n", - "38 - in \n", - "\n", - " structure.ac_helix structure.resolution structure.qualityscore \\\n", - "0 na 2.10 6.1 \n", - "1 in 1.92 8.0 \n", - ".. ... ... ... \n", - "37 in 1.54 8.7 \n", - "38 in 2.60 8.0 \n", - "\n", - " structure.missing_residues structure.missing_atoms structure.rmsd1 \\\n", - "0 5 15 0.765 \n", - "1 0 0 0.775 \n", - ".. ... ... ... \n", - "37 3 1 0.782 \n", - "38 0 0 0.780 \n", - "\n", - " structure.rmsd2 structure.front structure.gate structure.back \\\n", - "0 2.173 True True True \n", - "1 2.104 True True False \n", - ".. ... ... ... ... \n", - "37 2.117 True True True \n", - "38 2.099 True True True \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - 281 0 \n", + "1 - 281 0 \n", + ".. ... ... ... \n", + "37 - 281 0 \n", + "38 - 281 0 \n", + "\n", + " ligand.name ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 na in \n", + "1 in in \n", + ".. ... ... ... ... \n", + "37 in in \n", + "38 in in \n", + "\n", + " structure.resolution structure.qualityscore structure.missing_residues \\\n", + "0 2.10 6.1 5 \n", + "1 1.92 8.0 0 \n", + ".. ... ... ... \n", + "37 1.54 8.7 3 \n", + "38 2.60 8.0 0 \n", + "\n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 15 0.765 2.173 \n", + "1 0 0.775 2.104 \n", + ".. ... ... ... \n", + "37 1 0.783 2.116 \n", + "38 0 0.780 2.099 \n", + "\n", + " interaction.fingerprint structure.front structure.gate structure.back \\\n", + "0 True True True \n", + "1 True True False \n", + ".. ... ... ... ... \n", + "37 True True True \n", + "38 True True True \n", "\n", " structure.fp_i structure.fp_ii structure.bp_i_a structure.bp_i_b \\\n", "0 False False True True \n", @@ -7436,17 +7645,17 @@ "0 False 0.000000 0.000000 \n", "1 False 18.551800 59.874401 \n", ".. ... ... ... \n", - "37 False 19.312901 61.760201 \n", + "37 False 19.312901 61.760502 \n", "38 False 19.297100 62.362701 \n", "\n", - " structure.grich_rotation structure.filepath \n", - "0 0.000000 \n", - "1 54.618500 \n", - ".. ... ... \n", - "37 52.242699 \n", - "38 53.872799 \n", + " structure.grich_rotation structure.filepath structure.curation_flag \n", + "0 0.000000 False \n", + "1 54.618500 False \n", + ".. ... ... ... \n", + "37 52.242401 False \n", + "38 53.872799 False \n", "\n", - "[39 rows x 41 columns]" + "[39 rows x 46 columns]" ] }, "execution_count": 71, @@ -7518,11 +7727,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -7533,6 +7745,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -7552,6 +7765,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -7564,11 +7778,14 @@ " Human\n", " 472\n", " BMX\n", + " ['BMX']\n", " Tec\n", " TK\n", " KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", " 1N1\n", " -\n", + " <NA>\n", + " <NA>\n", " N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX...\n", " -\n", " out-like\n", @@ -7579,6 +7796,7 @@ " 0\n", " 0.839\n", " 1.967\n", + " 0000000000000010000001000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -7598,6 +7816,7 @@ " NaN\n", " NaN\n", " HUMAN/BMX/3sxr_chainA\n", + " <NA>\n", " \n", " \n", " 1\n", @@ -7608,11 +7827,14 @@ " Mouse\n", " 532\n", " ABL1\n", + " ['Abl1', 'ABL1']\n", " Abl\n", " TK\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " <NA>\n", + " <NA>\n", " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", " -\n", " out\n", @@ -7623,6 +7845,7 @@ " 8\n", " 0.925\n", " 2.319\n", + " 0000000000000010000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -7642,6 +7865,7 @@ " NaN\n", " NaN\n", " MOUSE/ABL1/1fpu_chainA\n", + " <NA>\n", " \n", " \n", " 2\n", @@ -7652,11 +7876,14 @@ " Mouse\n", " 532\n", " ABL1\n", + " ['Abl1', 'ABL1']\n", " Abl\n", " TK\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " <NA>\n", + " <NA>\n", " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", " -\n", " out\n", @@ -7667,6 +7894,7 @@ " 4\n", " 0.925\n", " 2.329\n", + " 0000000000000010000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -7686,6 +7914,7 @@ " NaN\n", " NaN\n", " MOUSE/ABL1/1fpu_chainB\n", + " <NA>\n", " \n", " \n", "\n", @@ -7702,20 +7931,20 @@ "1 A Mouse 532 ABL1 \n", "2 B Mouse 532 ABL1 \n", "\n", - " kinase.family kinase.group \\\n", - "0 Tec TK \n", - "1 Abl TK \n", - "2 Abl TK \n", + " kinase.names kinase.family kinase.group \\\n", + "0 ['BMX'] Tec TK \n", + "1 ['Abl1', 'ABL1'] Abl TK \n", + "2 ['Abl1', 'ABL1'] Abl TK \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", "1 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", "2 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", "\n", - " ligand_allosteric.expo_id \\\n", - "0 - \n", - "1 - \n", - "2 - \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - \n", + "1 - \n", + "2 - \n", "\n", " ligand.name ligand_allosteric.name \\\n", "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... - \n", @@ -7732,10 +7961,15 @@ "1 8.8 0 \n", "2 9.2 0 \n", "\n", - " structure.missing_atoms structure.rmsd1 structure.rmsd2 structure.front \\\n", - "0 0 0.839 1.967 \n", - "1 8 0.925 2.319 \n", - "2 4 0.925 2.329 \n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 0 0.839 1.967 \n", + "1 8 0.925 2.319 \n", + "2 4 0.925 2.329 \n", + "\n", + " interaction.fingerprint structure.front \\\n", + "0 0000000000000010000001000000000000000000000000... \n", + "1 0000000000000010000000000000000000000000000000... \n", + "2 0000000000000010000000000000000000000000000000... \n", "\n", " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", "0 False False \n", @@ -7762,10 +7996,10 @@ "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "\n", - " structure.filepath \n", - "0 HUMAN/BMX/3sxr_chainA \n", - "1 MOUSE/ABL1/1fpu_chainA \n", - "2 MOUSE/ABL1/1fpu_chainB " + " structure.filepath structure.curation_flag \n", + "0 HUMAN/BMX/3sxr_chainA \n", + "1 MOUSE/ABL1/1fpu_chainA \n", + "2 MOUSE/ABL1/1fpu_chainB " ] }, "execution_count": 73, @@ -7844,11 +8078,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -7859,6 +8096,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -7878,40 +8116,42 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", " \n", " 0\n", - " 1089\n", - " 2g2h\n", - " -\n", - " B\n", + " 12887\n", + " 6xrg\n", + " 16\n", + " A\n", " Human\n", " 392\n", " ABL1\n", " <NA>\n", " <NA>\n", - " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", - " P16\n", + " <NA>\n", + " HKLGEGQYGEVYEVAVKTLEFLKEAAVLKEIKPNLVQLLGVYIITE...\n", " -\n", + " -\n", + " 0\n", + " 0\n", " <NA>\n", " <NA>\n", - " out-like\n", + " out\n", " in\n", - " 2.00\n", - " 8.0\n", + " NaN\n", + " 7.6\n", " 0\n", " 0\n", - " 0.863\n", - " 2.140\n", - " True\n", - " True\n", + " 0.949\n", + " 2.476\n", + " <NA>\n", + " False\n", " False\n", " False\n", " False\n", - " True\n", - " True\n", " False\n", " False\n", " False\n", @@ -7920,35 +8160,43 @@ " False\n", " False\n", " False\n", - " 16.691601\n", - " 55.077801\n", - " 1.634190\n", + " False\n", + " False\n", + " False\n", + " 17.855801\n", + " 64.973000\n", + " 72.903801\n", " <NA>\n", + " False\n", " \n", " \n", " 1\n", - " 10944\n", - " 6npu\n", - " -\n", + " 1112\n", + " 3cs9\n", + " B\n", " B\n", " Human\n", " 392\n", " ABL1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", - " STI\n", - " KWV\n", + " NIL\n", + " -\n", + " 277\n", + " 0\n", " <NA>\n", " <NA>\n", " out\n", - " out\n", - " 2.33\n", - " 8.5\n", + " in\n", + " 2.21\n", + " 7.2\n", " 2\n", - " 3\n", - " 0.936\n", - " 2.305\n", + " 0\n", + " 0.937\n", + " 2.245\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -7961,13 +8209,14 @@ " False\n", " True\n", " False\n", - " False\n", " True\n", " False\n", - " 18.220200\n", - " 61.682899\n", - " 20.631399\n", + " True\n", + " 18.352800\n", + " 61.498600\n", + " 11.324800\n", " <NA>\n", + " False\n", " \n", " \n", " ...\n", @@ -8012,31 +8261,40 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 203\n", - " 5708\n", - " 3ms9\n", - " B\n", + " 205\n", + " 5717\n", + " 3kf4\n", + " -\n", " A\n", " Mouse\n", " 532\n", " ABL1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", - " STI\n", - " MS9\n", + " B90\n", + " -\n", + " 1799\n", + " 0\n", " <NA>\n", " <NA>\n", - " out\n", - " out\n", - " 1.80\n", - " 7.6\n", + " in\n", + " in\n", + " 1.90\n", + " 8.0\n", " 0\n", " 0\n", - " 0.923\n", - " 2.307\n", + " 0.781\n", + " 2.148\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -8047,40 +8305,45 @@ " False\n", " False\n", " False\n", - " True\n", " False\n", " False\n", - " True\n", " False\n", - " 18.652599\n", - " 63.107601\n", - " 15.873000\n", + " False\n", + " False\n", + " 18.121901\n", + " 59.654598\n", + " 8.660560\n", " <NA>\n", + " False\n", " \n", " \n", - " 204\n", - " 5703\n", - " 1iep\n", - " -\n", + " 206\n", + " 5723\n", + " 3k5v\n", + " B\n", " A\n", " Mouse\n", " 532\n", " ABL1\n", " <NA>\n", " <NA>\n", + " <NA>\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " STI\n", - " -\n", + " STJ\n", + " 48\n", + " 1796\n", " <NA>\n", " <NA>\n", " out\n", - " out\n", - " 2.10\n", + " in\n", + " 1.74\n", " 7.6\n", " 0\n", " 0\n", - " 0.922\n", - " 2.306\n", + " 0.924\n", + " 2.310\n", + " <NA>\n", " True\n", " True\n", " True\n", @@ -8096,116 +8359,124 @@ " False\n", " True\n", " False\n", - " 18.631001\n", - " 63.209400\n", - " 14.280100\n", + " 18.701099\n", + " 62.870201\n", + " 12.680900\n", " <NA>\n", + " False\n", " \n", " \n", "\n", - "

205 rows × 41 columns

\n", + "

207 rows × 46 columns

\n", "" ], "text/plain": [ " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", - "0 1089 2g2h - \n", - "1 10944 6npu - \n", + "0 12887 6xrg 16 \n", + "1 1112 3cs9 B \n", ".. ... ... ... \n", - "203 5708 3ms9 B \n", - "204 5703 1iep - \n", + "205 5717 3kf4 - \n", + "206 5723 3k5v B \n", "\n", " structure.chain species.klifs kinase.klifs_id kinase.klifs_name \\\n", - "0 B Human 392 ABL1 \n", + "0 A Human 392 ABL1 \n", "1 B Human 392 ABL1 \n", ".. ... ... ... ... \n", - "203 A Mouse 532 ABL1 \n", - "204 A Mouse 532 ABL1 \n", + "205 A Mouse 532 ABL1 \n", + "206 A Mouse 532 ABL1 \n", "\n", - " kinase.family kinase.group \\\n", - "0 \n", - "1 \n", - ".. ... ... \n", - "203 \n", - "204 \n", + " kinase.names kinase.family kinase.group \\\n", + "0 \n", + "1 \n", + ".. ... ... ... \n", + "205 \n", + "206 \n", "\n", " structure.pocket ligand.expo_id \\\n", - "0 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... P16 \n", - "1 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... STI \n", + "0 HKLGEGQYGEVYEVAVKTLEFLKEAAVLKEIKPNLVQLLGVYIITE... - \n", + "1 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... NIL \n", ".. ... ... \n", - "203 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... STI \n", - "204 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... STI \n", - "\n", - " ligand_allosteric.expo_id ligand.name ligand_allosteric.name \\\n", - "0 - \n", - "1 KWV \n", - ".. ... ... ... \n", - "203 MS9 \n", - "204 - \n", - "\n", - " structure.dfg structure.ac_helix structure.resolution \\\n", - "0 out-like in 2.00 \n", - "1 out out 2.33 \n", - ".. ... ... ... \n", - "203 out out 1.80 \n", - "204 out out 2.10 \n", - "\n", - " structure.qualityscore structure.missing_residues \\\n", - "0 8.0 0 \n", - "1 8.5 2 \n", - ".. ... ... \n", - "203 7.6 0 \n", - "204 7.6 0 \n", + "205 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... B90 \n", + "206 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... STI \n", + "\n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - 0 0 \n", + "1 - 277 0 \n", + ".. ... ... ... \n", + "205 - 1799 0 \n", + "206 STJ 48 1796 \n", + "\n", + " ligand.name ligand_allosteric.name structure.dfg structure.ac_helix \\\n", + "0 out in \n", + "1 out in \n", + ".. ... ... ... ... \n", + "205 in in \n", + "206 out in \n", + "\n", + " structure.resolution structure.qualityscore structure.missing_residues \\\n", + "0 NaN 7.6 0 \n", + "1 2.21 7.2 2 \n", + ".. ... ... ... \n", + "205 1.90 8.0 0 \n", + "206 1.74 7.6 0 \n", "\n", " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", - "0 0 0.863 2.140 \n", - "1 3 0.936 2.305 \n", + "0 0 0.949 2.476 \n", + "1 0 0.937 2.245 \n", ".. ... ... ... \n", - "203 0 0.923 2.307 \n", - "204 0 0.922 2.306 \n", - "\n", - " structure.front structure.gate structure.back structure.fp_i \\\n", - "0 True True False False \n", - "1 True True True False \n", - ".. ... ... ... ... \n", - "203 True True True False \n", - "204 True True True False \n", - "\n", - " structure.fp_ii structure.bp_i_a structure.bp_i_b structure.bp_ii_in \\\n", - "0 False True True False \n", - "1 False True True False \n", - ".. ... ... ... ... \n", - "203 False True True False \n", - "204 False True True False \n", - "\n", - " structure.bp_ii_a_in structure.bp_ii_b_in structure.bp_ii_out \\\n", - "0 False False False \n", - "1 False False True \n", - ".. ... ... ... \n", - "203 False False True \n", - "204 False False True \n", - "\n", - " structure.bp_ii_b structure.bp_iii structure.bp_iv structure.bp_v \\\n", - "0 False False False False \n", - "1 False False True False \n", - ".. ... ... ... ... \n", - "203 False False True False \n", - "204 False False True False \n", - "\n", - " structure.grich_distance structure.grich_angle \\\n", - "0 16.691601 55.077801 \n", - "1 18.220200 61.682899 \n", - ".. ... ... \n", - "203 18.652599 63.107601 \n", - "204 18.631001 63.209400 \n", - "\n", - " structure.grich_rotation structure.filepath \n", - "0 1.634190 \n", - "1 20.631399 \n", - ".. ... ... \n", - "203 15.873000 \n", - "204 14.280100 \n", - "\n", - "[205 rows x 41 columns]" + "205 0 0.781 2.148 \n", + "206 0 0.924 2.310 \n", + "\n", + " interaction.fingerprint structure.front structure.gate structure.back \\\n", + "0 False False False \n", + "1 True True True \n", + ".. ... ... ... ... \n", + "205 True True True \n", + "206 True True True \n", + "\n", + " structure.fp_i structure.fp_ii structure.bp_i_a structure.bp_i_b \\\n", + "0 False False False False \n", + "1 False False True True \n", + ".. ... ... ... ... \n", + "205 False False True True \n", + "206 False False True True \n", + "\n", + " structure.bp_ii_in structure.bp_ii_a_in structure.bp_ii_b_in \\\n", + "0 False False False \n", + "1 False False False \n", + ".. ... ... ... \n", + "205 False False False \n", + "206 False False False \n", + "\n", + " structure.bp_ii_out structure.bp_ii_b structure.bp_iii \\\n", + "0 False False False \n", + "1 True False True \n", + ".. ... ... ... \n", + "205 False False False \n", + "206 True False False \n", + "\n", + " structure.bp_iv structure.bp_v structure.grich_distance \\\n", + "0 False False 17.855801 \n", + "1 False True 18.352800 \n", + ".. ... ... ... \n", + "205 False False 18.121901 \n", + "206 True False 18.701099 \n", + "\n", + " structure.grich_angle structure.grich_rotation structure.filepath \\\n", + "0 64.973000 72.903801 \n", + "1 61.498600 11.324800 \n", + ".. ... ... ... \n", + "205 59.654598 8.660560 \n", + "206 62.870201 12.680900 \n", + "\n", + " structure.curation_flag \n", + "0 False \n", + "1 False \n", + ".. ... \n", + "205 False \n", + "206 False \n", + "\n", + "[207 rows x 46 columns]" ] }, "execution_count": 75, @@ -8277,11 +8548,14 @@ " species.klifs\n", " kinase.klifs_id\n", " kinase.klifs_name\n", + " kinase.names\n", " kinase.family\n", " kinase.group\n", " structure.pocket\n", " ligand.expo_id\n", " ligand_allosteric.expo_id\n", + " ligand.klifs_id\n", + " ligand_allosteric.klifs_id\n", " ligand.name\n", " ligand_allosteric.name\n", " structure.dfg\n", @@ -8292,6 +8566,7 @@ " structure.missing_atoms\n", " structure.rmsd1\n", " structure.rmsd2\n", + " interaction.fingerprint\n", " structure.front\n", " structure.gate\n", " structure.back\n", @@ -8311,6 +8586,7 @@ " structure.grich_angle\n", " structure.grich_rotation\n", " structure.filepath\n", + " structure.curation_flag\n", " \n", " \n", " \n", @@ -8323,11 +8599,14 @@ " Human\n", " 472\n", " BMX\n", + " ['BMX']\n", " Tec\n", " TK\n", " KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE...\n", " 1N1\n", " -\n", + " <NA>\n", + " <NA>\n", " N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX...\n", " -\n", " out-like\n", @@ -8338,6 +8617,7 @@ " 0\n", " 0.839\n", " 1.967\n", + " 0000000000000010000001000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -8357,6 +8637,7 @@ " NaN\n", " NaN\n", " HUMAN/BMX/3sxr_chainA\n", + " <NA>\n", " \n", " \n", " 1\n", @@ -8367,11 +8648,14 @@ " Mouse\n", " 532\n", " ABL1\n", + " ['Abl1', 'ABL1']\n", " Abl\n", " TK\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " <NA>\n", + " <NA>\n", " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", " -\n", " out\n", @@ -8382,6 +8666,7 @@ " 8\n", " 0.925\n", " 2.319\n", + " 0000000000000010000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -8401,6 +8686,7 @@ " NaN\n", " NaN\n", " MOUSE/ABL1/1fpu_chainA\n", + " <NA>\n", " \n", " \n", " 2\n", @@ -8411,11 +8697,14 @@ " Mouse\n", " 532\n", " ABL1\n", + " ['Abl1', 'ABL1']\n", " Abl\n", " TK\n", " HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...\n", " PRC\n", " -\n", + " <NA>\n", + " <NA>\n", " N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]...\n", " -\n", " out\n", @@ -8426,6 +8715,7 @@ " 4\n", " 0.925\n", " 2.329\n", + " 0000000000000010000000000000000000000000000000...\n", " <NA>\n", " <NA>\n", " <NA>\n", @@ -8445,6 +8735,7 @@ " NaN\n", " NaN\n", " MOUSE/ABL1/1fpu_chainB\n", + " <NA>\n", " \n", " \n", "\n", @@ -8461,20 +8752,20 @@ "1 A Mouse 532 ABL1 \n", "2 B Mouse 532 ABL1 \n", "\n", - " kinase.family kinase.group \\\n", - "0 Tec TK \n", - "1 Abl TK \n", - "2 Abl TK \n", + " kinase.names kinase.family kinase.group \\\n", + "0 ['BMX'] Tec TK \n", + "1 ['Abl1', 'ABL1'] Abl TK \n", + "2 ['Abl1', 'ABL1'] Abl TK \n", "\n", " structure.pocket ligand.expo_id \\\n", "0 KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTE... 1N1 \n", "1 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", "2 HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE... PRC \n", "\n", - " ligand_allosteric.expo_id \\\n", - "0 - \n", - "1 - \n", - "2 - \n", + " ligand_allosteric.expo_id ligand.klifs_id ligand_allosteric.klifs_id \\\n", + "0 - \n", + "1 - \n", + "2 - \n", "\n", " ligand.name ligand_allosteric.name \\\n", "0 N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROX... - \n", @@ -8491,10 +8782,15 @@ "1 8.8 0 \n", "2 9.2 0 \n", "\n", - " structure.missing_atoms structure.rmsd1 structure.rmsd2 structure.front \\\n", - "0 0 0.839 1.967 \n", - "1 8 0.925 2.319 \n", - "2 4 0.925 2.329 \n", + " structure.missing_atoms structure.rmsd1 structure.rmsd2 \\\n", + "0 0 0.839 1.967 \n", + "1 8 0.925 2.319 \n", + "2 4 0.925 2.329 \n", + "\n", + " interaction.fingerprint structure.front \\\n", + "0 0000000000000010000001000000000000000000000000... \n", + "1 0000000000000010000000000000000000000000000000... \n", + "2 0000000000000010000000000000000000000000000000... \n", "\n", " structure.gate structure.back structure.fp_i structure.fp_ii \\\n", "0 False False \n", @@ -8521,10 +8817,10 @@ "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "\n", - " structure.filepath \n", - "0 HUMAN/BMX/3sxr_chainA \n", - "1 MOUSE/ABL1/1fpu_chainA \n", - "2 MOUSE/ABL1/1fpu_chainB " + " structure.filepath structure.curation_flag \n", + "0 HUMAN/BMX/3sxr_chainA \n", + "1 MOUSE/ABL1/1fpu_chainA \n", + "2 MOUSE/ABL1/1fpu_chainB " ] }, "execution_count": 77, @@ -8587,24 +8883,17 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2e7bcc40967744de9e3c1cfe773b1e4a", + "model_id": "0681f9b0e434422491c2f694353a6f9e", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=10.0), HTML(value='')))" + "Processing...: 0%| | 0/10 [00:00\n", " kinase.pref_name\n", " kinase.uniprot\n", + " kinase.chembl_id\n", + " ligand.chembl_id\n", " ligand.bioactivity_standard_type\n", " ligand.bioactivity_standard_relation\n", " ligand.bioactivity_standard_value\n", @@ -8639,24 +8930,28 @@ " \n", " \n", " 0\n", - " Ephrin type-B receptor 2\n", - " P29323\n", - " EC50\n", + " Discoidin domain-containing receptor 2\n", + " Q16832\n", + " CHEMBL5122\n", + " CHEMBL552425\n", + " IC50\n", " =\n", - " 40.00\n", + " 18.60\n", " nM\n", - " 7.40\n", + " 7.73\n", " Homo sapiens\n", " \n", " \n", " 1\n", " Ephrin type-B receptor 2\n", " P29323\n", + " CHEMBL3290\n", + " CHEMBL552425\n", " EC50\n", " =\n", - " 100.00\n", + " 40.00\n", " nM\n", - " 7.00\n", + " 7.40\n", " Homo sapiens\n", " \n", " \n", @@ -8669,11 +8964,15 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 521\n", + " 540\n", " Tyrosine-protein kinase LCK\n", " P06239\n", + " CHEMBL258\n", + " CHEMBL3086536\n", " Ki\n", " =\n", " 55.00\n", @@ -8682,9 +8981,11 @@ " Homo sapiens\n", " \n", " \n", - " 522\n", + " 541\n", " Tyrosine-protein kinase ITK/TSK\n", " Q08881\n", + " CHEMBL2959\n", + " CHEMBL3263052\n", " Ki\n", " =\n", " 0.93\n", @@ -8694,39 +8995,46 @@ " \n", " \n", "\n", - "

523 rows × 8 columns

\n", + "

542 rows × 10 columns

\n", "" ], "text/plain": [ - " kinase.pref_name kinase.uniprot \\\n", - "0 Ephrin type-B receptor 2 P29323 \n", - "1 Ephrin type-B receptor 2 P29323 \n", - ".. ... ... \n", - "521 Tyrosine-protein kinase LCK P06239 \n", - "522 Tyrosine-protein kinase ITK/TSK Q08881 \n", - "\n", - " ligand.bioactivity_standard_type ligand.bioactivity_standard_relation \\\n", - "0 EC50 = \n", - "1 EC50 = \n", - ".. ... ... \n", - "521 Ki = \n", - "522 Ki = \n", - "\n", - " ligand.bioactivity_standard_value ligand.bioactivity_standard_units \\\n", - "0 40.00 nM \n", - "1 100.00 nM \n", - ".. ... ... \n", - "521 55.00 nM \n", - "522 0.93 nM \n", - "\n", - " ligand.bioactivity_pchembl_value species.chembl \n", - "0 7.40 Homo sapiens \n", - "1 7.00 Homo sapiens \n", - ".. ... ... \n", - "521 7.26 Homo sapiens \n", - "522 9.03 Homo sapiens \n", - "\n", - "[523 rows x 8 columns]" + " kinase.pref_name kinase.uniprot kinase.chembl_id \\\n", + "0 Discoidin domain-containing receptor 2 Q16832 CHEMBL5122 \n", + "1 Ephrin type-B receptor 2 P29323 CHEMBL3290 \n", + ".. ... ... ... \n", + "540 Tyrosine-protein kinase LCK P06239 CHEMBL258 \n", + "541 Tyrosine-protein kinase ITK/TSK Q08881 CHEMBL2959 \n", + "\n", + " ligand.chembl_id ligand.bioactivity_standard_type \\\n", + "0 CHEMBL552425 IC50 \n", + "1 CHEMBL552425 EC50 \n", + ".. ... ... \n", + "540 CHEMBL3086536 Ki \n", + "541 CHEMBL3263052 Ki \n", + "\n", + " ligand.bioactivity_standard_relation ligand.bioactivity_standard_value \\\n", + "0 = 18.60 \n", + "1 = 40.00 \n", + ".. ... ... \n", + "540 = 55.00 \n", + "541 = 0.93 \n", + "\n", + " ligand.bioactivity_standard_units ligand.bioactivity_pchembl_value \\\n", + "0 nM 7.73 \n", + "1 nM 7.40 \n", + ".. ... ... \n", + "540 nM 7.26 \n", + "541 nM 9.03 \n", + "\n", + " species.chembl \n", + "0 Homo sapiens \n", + "1 Homo sapiens \n", + ".. ... \n", + "540 Homo sapiens \n", + "541 Homo sapiens \n", + "\n", + "[542 rows x 10 columns]" ] }, "execution_count": 79, @@ -8771,12 +9079,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1fd679b6234e4f0bb6a403a0d003fc7e", + "model_id": "f70759a55dca4a8a9cc0b35eb21c8023", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=2.0), HTML(value='')))" + "Processing...: 0%| | 0/2 [00:00.Was instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" + "Error for 10000: Expected type to be dict for value [400, 'KLIFS error: The provided kinase ID(s) is/are invalid'] to unmarshal to a .Was instead.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4c55a36d9a014d1ab7bfc92894194a1e", + "model_id": "6d31d6f4f31d46beaa02317f38e7df00", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=3.0), HTML(value='')))" + "Processing...: 0%| | 0/3 [00:00.Was instead.\n", + "Error for 632: Expected type to be dict for value [400, 'KLIFS error: This ligand is not available in ChEMBL.'] to unmarshal to a .Was instead.\n", "Error for 3716: Expected type to be dict for value [400, 'KLIFS error: This ligand is not available in ChEMBL.'] to unmarshal to a .Was instead.\n" ] }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "text/html": [ @@ -8852,6 +9146,8 @@ " \n", " kinase.pref_name\n", " kinase.uniprot\n", + " kinase.chembl_id\n", + " ligand.chembl_id\n", " ligand.bioactivity_standard_type\n", " ligand.bioactivity_standard_relation\n", " ligand.bioactivity_standard_value\n", @@ -8865,6 +9161,8 @@ " 0\n", " Activin receptor type-1\n", " Q04771\n", + " CHEMBL5903\n", + " CHEMBL1421\n", " Kd\n", " =\n", " 620.00\n", @@ -8876,6 +9174,8 @@ " 1\n", " Activin receptor type-1\n", " Q04771\n", + " CHEMBL5903\n", + " CHEMBL1421\n", " Ki\n", " =\n", " 79.43\n", @@ -8893,64 +9193,77 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 279\n", + " 289\n", " Wee1-like protein kinase 2\n", " P0C1S8\n", + " CHEMBL5095\n", + " CHEMBL1421\n", " Kd\n", " =\n", - " 481.00\n", + " 200.00\n", " nM\n", - " 6.32\n", + " 6.70\n", " Homo sapiens\n", " \n", " \n", - " 280\n", + " 290\n", " Wee1-like protein kinase 2\n", " P0C1S8\n", + " CHEMBL5095\n", + " CHEMBL1421\n", " Kd\n", " =\n", - " 200.00\n", + " 481.00\n", " nM\n", - " 6.70\n", + " 6.32\n", " Homo sapiens\n", " \n", " \n", "\n", - "

281 rows × 8 columns

\n", + "

291 rows × 10 columns

\n", "" ], "text/plain": [ - " kinase.pref_name kinase.uniprot \\\n", - "0 Activin receptor type-1 Q04771 \n", - "1 Activin receptor type-1 Q04771 \n", - ".. ... ... \n", - "279 Wee1-like protein kinase 2 P0C1S8 \n", - "280 Wee1-like protein kinase 2 P0C1S8 \n", - "\n", - " ligand.bioactivity_standard_type ligand.bioactivity_standard_relation \\\n", - "0 Kd = \n", - "1 Ki = \n", - ".. ... ... \n", - "279 Kd = \n", - "280 Kd = \n", - "\n", - " ligand.bioactivity_standard_value ligand.bioactivity_standard_units \\\n", - "0 620.00 nM \n", - "1 79.43 nM \n", - ".. ... ... \n", - "279 481.00 nM \n", - "280 200.00 nM \n", - "\n", - " ligand.bioactivity_pchembl_value species.chembl \n", - "0 6.21 Homo sapiens \n", - "1 7.10 Homo sapiens \n", - ".. ... ... \n", - "279 6.32 Homo sapiens \n", - "280 6.70 Homo sapiens \n", - "\n", - "[281 rows x 8 columns]" + " kinase.pref_name kinase.uniprot kinase.chembl_id \\\n", + "0 Activin receptor type-1 Q04771 CHEMBL5903 \n", + "1 Activin receptor type-1 Q04771 CHEMBL5903 \n", + ".. ... ... ... \n", + "289 Wee1-like protein kinase 2 P0C1S8 CHEMBL5095 \n", + "290 Wee1-like protein kinase 2 P0C1S8 CHEMBL5095 \n", + "\n", + " ligand.chembl_id ligand.bioactivity_standard_type \\\n", + "0 CHEMBL1421 Kd \n", + "1 CHEMBL1421 Ki \n", + ".. ... ... \n", + "289 CHEMBL1421 Kd \n", + "290 CHEMBL1421 Kd \n", + "\n", + " ligand.bioactivity_standard_relation ligand.bioactivity_standard_value \\\n", + "0 = 620.00 \n", + "1 = 79.43 \n", + ".. ... ... \n", + "289 = 200.00 \n", + "290 = 481.00 \n", + "\n", + " ligand.bioactivity_standard_units ligand.bioactivity_pchembl_value \\\n", + "0 nM 6.21 \n", + "1 nM 7.10 \n", + ".. ... ... \n", + "289 nM 6.70 \n", + "290 nM 6.32 \n", + "\n", + " species.chembl \n", + "0 Homo sapiens \n", + "1 Homo sapiens \n", + ".. ... \n", + "289 Homo sapiens \n", + "290 Homo sapiens \n", + "\n", + "[291 rows x 10 columns]" ] }, "execution_count": 80, @@ -8970,12 +9283,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0ecc5ae8145d44178d4ca600835af96a", + "model_id": "ca494d28fb914a1891543af72992db28", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" + "Processing...: 0%| | 0/1 [00:00.Was instead.\n" + "Error for 10000000: Expected type to be dict for value [400, 'KLIFS error: The provided kinase ID(s) is/are invalid'] to unmarshal to a .Was instead.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", "Input values yield no results.\n" ] } @@ -9022,13 +9334,6 @@ "### Bioactivities from ligand KLIFS ID(s)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "__Remote__" - ] - }, { "cell_type": "code", "execution_count": 82, @@ -9037,12 +9342,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "df2be59c034c40bda54c0653de5caa3b", + "model_id": "d278b183a0b04c4cb878a7bff64fd793", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=3.0), HTML(value='')))" + "Processing...: 0%| | 0/3 [00:00.Was instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" + "Error for 10000: Expected type to be dict for value [400, 'KLIFS error: Could not find a ligand with the provided ID.'] to unmarshal to a .Was instead.\n" ] }, { @@ -9087,6 +9385,8 @@ " \n", " kinase.pref_name\n", " kinase.uniprot\n", + " kinase.chembl_id\n", + " ligand.chembl_id\n", " ligand.bioactivity_standard_type\n", " ligand.bioactivity_standard_relation\n", " ligand.bioactivity_standard_value\n", @@ -9101,6 +9401,8 @@ " 0\n", " cAMP-dependent protein kinase alpha-catalytic ...\n", " Q95J97\n", + " CHEMBL4885\n", + " CHEMBL281948\n", " IC50\n", " =\n", " 0.016\n", @@ -9113,6 +9415,8 @@ " 1\n", " Myosin light chain kinase, smooth muscle\n", " P11799\n", + " CHEMBL3062\n", + " CHEMBL281948\n", " IC50\n", " =\n", " 0.020\n", @@ -9132,11 +9436,15 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", " \n", " \n", " 7\n", " Vascular endothelial growth factor receptor 2\n", " P35968\n", + " CHEMBL279\n", + " CHEMBL281948\n", " IC50\n", " =\n", " 43.000\n", @@ -9149,322 +9457,83 @@ " 8\n", " Glycogen synthase kinase-3 beta\n", " P49841\n", + " CHEMBL262\n", + " CHEMBL523435\n", " IC50\n", " =\n", " 247.000\n", " nM\n", " 6.61\n", " Homo sapiens\n", - " 200\n", - " \n", - " \n", - "\n", - "

9 rows × 9 columns

\n", - "" - ], - "text/plain": [ - " kinase.pref_name kinase.uniprot \\\n", - "0 cAMP-dependent protein kinase alpha-catalytic ... Q95J97 \n", - "1 Myosin light chain kinase, smooth muscle P11799 \n", - ".. ... ... \n", - "7 Vascular endothelial growth factor receptor 2 P35968 \n", - "8 Glycogen synthase kinase-3 beta P49841 \n", - "\n", - " ligand.bioactivity_standard_type ligand.bioactivity_standard_relation \\\n", - "0 IC50 = \n", - "1 IC50 = \n", - ".. ... ... \n", - "7 IC50 = \n", - "8 IC50 = \n", - "\n", - " ligand.bioactivity_standard_value ligand.bioactivity_standard_units \\\n", - "0 0.016 nM \n", - "1 0.020 nM \n", - ".. ... ... \n", - "7 43.000 nM \n", - "8 247.000 nM \n", - "\n", - " ligand.bioactivity_pchembl_value species.chembl \\\n", - "0 10.80 Oryctolagus cuniculus \n", - "1 10.70 Gallus gallus \n", - ".. ... ... \n", - "7 7.37 Homo sapiens \n", - "8 6.61 Homo sapiens \n", - "\n", - " ligand.klifs_id (query) \n", - "0 100 \n", - "1 100 \n", - ".. ... \n", - "7 100 \n", - "8 200 \n", - "\n", - "[9 rows x 9 columns]" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote.bioactivities.by_ligand_klifs_id([100, 200, 10000])" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "06bdeb0340fe48c2b24ab021e425236d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:opencadd.databases.klifs.core:There was (were) 1/1 failed request(s).\n", - "Show error messages (up to 5 messages only):\n", - "ERROR:opencadd.databases.klifs.core:Error for 10000: Expected type to be dict for value [400, 'KLIFS error: Could not find a ligand with the provided ID.'] to unmarshal to a .Was instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Input values yield no results.\n" - ] - } - ], - "source": [ - "try:\n", - " remote.bioactivities.by_ligand_klifs_id(10000)\n", - "except SwaggerMappingError as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "__Local__\n", - "\n", - "This information is not available locally." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Bioactivities from ligand Expo ID(s)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "__Remote__" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "12bb5c29c045425c90408ee542891942", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=2.0), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:opencadd.databases.klifs.core:There was (were) 1/2 failed request(s).\n", - "Show error messages (up to 5 messages only):\n", - "ERROR:opencadd.databases.klifs.core:Error for PRC: Expected type to be dict for value [400, 'KLIFS error: This ligand is not available in ChEMBL.'] to unmarshal to a .Was instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", "
kinase.pref_namekinase.uniprotligand.bioactivity_standard_typeligand.bioactivity_standard_relationligand.bioactivity_standard_valueligand.bioactivity_standard_unitsligand.bioactivity_pchembl_valuespecies.chemblligand.expo_id (query)
0Activin receptor type-1Q04771Kd=620.00nM6.21Homo sapiens1N1
1Activin receptor type-1Q04771Ki=79.43nM7.10Homo sapiens1N1
..............................
279Wee1-like protein kinase 2P0C1S8Kd=481.00nM6.32Homo sapiens1N1
280Wee1-like protein kinase 2P0C1S8Kd=200.00nM6.70Homo sapiens1N1200
\n", - "

281 rows × 9 columns

\n", + "

9 rows × 11 columns

\n", "
" ], "text/plain": [ - " kinase.pref_name kinase.uniprot \\\n", - "0 Activin receptor type-1 Q04771 \n", - "1 Activin receptor type-1 Q04771 \n", - ".. ... ... \n", - "279 Wee1-like protein kinase 2 P0C1S8 \n", - "280 Wee1-like protein kinase 2 P0C1S8 \n", - "\n", - " ligand.bioactivity_standard_type ligand.bioactivity_standard_relation \\\n", - "0 Kd = \n", - "1 Ki = \n", - ".. ... ... \n", - "279 Kd = \n", - "280 Kd = \n", - "\n", - " ligand.bioactivity_standard_value ligand.bioactivity_standard_units \\\n", - "0 620.00 nM \n", - "1 79.43 nM \n", - ".. ... ... \n", - "279 481.00 nM \n", - "280 200.00 nM \n", - "\n", - " ligand.bioactivity_pchembl_value species.chembl ligand.expo_id (query) \n", - "0 6.21 Homo sapiens 1N1 \n", - "1 7.10 Homo sapiens 1N1 \n", - ".. ... ... ... \n", - "279 6.32 Homo sapiens 1N1 \n", - "280 6.70 Homo sapiens 1N1 \n", - "\n", - "[281 rows x 9 columns]" + " kinase.pref_name kinase.uniprot \\\n", + "0 cAMP-dependent protein kinase alpha-catalytic ... Q95J97 \n", + "1 Myosin light chain kinase, smooth muscle P11799 \n", + ".. ... ... \n", + "7 Vascular endothelial growth factor receptor 2 P35968 \n", + "8 Glycogen synthase kinase-3 beta P49841 \n", + "\n", + " kinase.chembl_id ligand.chembl_id ligand.bioactivity_standard_type \\\n", + "0 CHEMBL4885 CHEMBL281948 IC50 \n", + "1 CHEMBL3062 CHEMBL281948 IC50 \n", + ".. ... ... ... \n", + "7 CHEMBL279 CHEMBL281948 IC50 \n", + "8 CHEMBL262 CHEMBL523435 IC50 \n", + "\n", + " ligand.bioactivity_standard_relation ligand.bioactivity_standard_value \\\n", + "0 = 0.016 \n", + "1 = 0.020 \n", + ".. ... ... \n", + "7 = 43.000 \n", + "8 = 247.000 \n", + "\n", + " ligand.bioactivity_standard_units ligand.bioactivity_pchembl_value \\\n", + "0 nM 10.80 \n", + "1 nM 10.70 \n", + ".. ... ... \n", + "7 nM 7.37 \n", + "8 nM 6.61 \n", + "\n", + " species.chembl ligand.klifs_id (query) \n", + "0 Oryctolagus cuniculus 100 \n", + "1 Gallus gallus 100 \n", + ".. ... ... \n", + "7 Homo sapiens 100 \n", + "8 Homo sapiens 200 \n", + "\n", + "[9 rows x 11 columns]" ] }, - "execution_count": 84, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "remote.bioactivities.by_ligand_expo_id([\"PRC\", \"1N1\"])" + "remote.bioactivities.by_ligand_klifs_id([100, 200, 10000])" ] }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2f078beafd064777b86af6f69a7489c0", + "model_id": "12660bc60d424e3486a178c04d6c0a14", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(HTML(value='Processing...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))" + "Processing...: 0%| | 0/1 [00:00.Was instead.\n" + "Error for 10000: Expected type to be dict for value [400, 'KLIFS error: Could not find a ligand with the provided ID.'] to unmarshal to a .Was instead.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", "Input values yield no results.\n" ] } ], "source": [ "try:\n", - " remote.bioactivities.by_ligand_expo_id(\"XXX\")\n", + " remote.bioactivities.by_ligand_klifs_id(10000)\n", "except SwaggerMappingError as e:\n", " print(e)" ] @@ -9529,7 +9597,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 84, "metadata": {}, "outputs": [ { @@ -9589,17 +9657,17 @@ "" ], "text/plain": [ - " interaction.id interaction.name\n", - "0 1 Apolar contact\n", - "1 2 Aromatic face-to-face\n", - ".. ... ...\n", - "5 6 Protein cation - ligand anion\n", - "6 7 Protein anion - ligand cation\n", + " interaction.id interaction.name\n", + "0 1 Apolar contact\n", + "1 2 Aromatic face-to-face\n", + ".. ... ...\n", + "5 6 Protein cation - ligand anion\n", + "6 7 Protein anion - ligand cation\n", "\n", "[7 rows x 2 columns]" ] }, - "execution_count": 86, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } @@ -9633,9 +9701,23 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 85, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " structure.klifs_id interaction.fingerprint\n", + "0 1 0000000000000010000000000000000000000000000000...\n", + "1 3 0000000000000010000000000000000000000000000000...\n", + "... ... ...\n", + "10518 13850 0000000000000010000001000000100000000000000000...\n", + "10519 13853 0000000000000010000001000000100000010000000000...\n", + "\n", + "[10520 rows x 2 columns]\n" + ] + }, { "data": { "text/html": [ @@ -9678,32 +9760,32 @@ " ...\n", " \n", " \n", - " 9845\n", - " 13109\n", - " 0000000000000010000001000000100000010000000000...\n", + " 10518\n", + " 13850\n", + " 0000000000000010000001000000100000000000000000...\n", " \n", " \n", - " 9846\n", - " 13110\n", + " 10519\n", + " 13853\n", " 0000000000000010000001000000100000010000000000...\n", " \n", " \n", "\n", - "

9847 rows × 2 columns

\n", + "

10520 rows × 2 columns

\n", "" ], "text/plain": [ - " structure.klifs_id interaction.fingerprint\n", - "0 1 0000000000000010000000000000000000000000000000...\n", - "1 3 0000000000000010000000000000000000000000000000...\n", - "... ... ...\n", - "9845 13109 0000000000000010000001000000100000010000000000...\n", - "9846 13110 0000000000000010000001000000100000010000000000...\n", - "\n", - "[9847 rows x 2 columns]" + " structure.klifs_id interaction.fingerprint\n", + "0 1 0000000000000010000000000000000000000000000000...\n", + "1 3 0000000000000010000000000000000000000000000000...\n", + "... ... ...\n", + "10518 13850 0000000000000010000001000000100000000000000000...\n", + "10519 13853 0000000000000010000001000000100000010000000000...\n", + "\n", + "[10520 rows x 2 columns]" ] }, - "execution_count": 87, + "execution_count": 85, "metadata": {}, "output_type": "execute_result" } @@ -9721,7 +9803,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 86, "metadata": {}, "outputs": [ { @@ -9761,28 +9843,37 @@ " 0000000000000000000000000000000000000000000000...\n", " \n", " \n", - " 2\n", - " 5728\n", - " 0000000000000010000000000000000000000000000000...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 3\n", - " 5705\n", - " 0000000000000010000000000000000000000000000000...\n", + " 4\n", + " 13623\n", + " <NA>\n", + " \n", + " \n", + " 5\n", + " 1243\n", + " <NA>\n", " \n", " \n", "\n", + "

6 rows × 2 columns

\n", "" ], "text/plain": [ - " structure.klifs_id interaction.fingerprint\n", - "0 3482 0000000000000010000001000000000000000000000000...\n", - "1 12347 0000000000000000000000000000000000000000000000...\n", - "2 5728 0000000000000010000000000000000000000000000000...\n", - "3 5705 0000000000000010000000000000000000000000000000..." + " structure.klifs_id interaction.fingerprint\n", + "0 3482 0000000000000010000001000000000000000000000000...\n", + "1 12347 0000000000000000000000000000000000000000000000...\n", + ".. ... ...\n", + "4 13623 \n", + "5 1243 \n", + "\n", + "[6 rows x 2 columns]" ] }, - "execution_count": 88, + "execution_count": 86, "metadata": {}, "output_type": "execute_result" } @@ -9807,7 +9898,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 87, "metadata": {}, "outputs": [ { @@ -9850,7 +9941,7 @@ "0 12347 0000000000000000000000000000000000000000000000..." ] }, - "execution_count": 89, + "execution_count": 87, "metadata": {}, "output_type": "execute_result" } @@ -9861,7 +9952,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 88, "metadata": {}, "outputs": [ { @@ -9890,7 +9981,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 89, "metadata": {}, "outputs": [ { @@ -9933,7 +10024,7 @@ "0 12347 0000000000000000000000000000000000000000000000..." ] }, - "execution_count": 91, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" } @@ -9944,7 +10035,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 90, "metadata": {}, "outputs": [ { @@ -9978,16 +10069,9 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 91, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:opencadd.databases.klifs.remote:This method uses this lookup: ligand KLIFS ID > Ligand Expo ID > structures.The KLIFS Swagger API offers no direct structure search by ligand KLIFS ID.However, one Ligand Expo ID can be represented by multiple ligand KLIFS IDs. Thus, in rare cases, this method will return also structure that are not connected to the input ligand KLIFS ID but to a mutual Ligand Expo ID.\n" - ] - }, { "data": { "text/html": [ @@ -10055,7 +10139,7 @@ "[14 rows x 2 columns]" ] }, - "execution_count": 93, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -10066,16 +10150,9 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 92, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:opencadd.databases.klifs.remote:This method uses this lookup: ligand KLIFS ID > Ligand Expo ID > structures.The KLIFS Swagger API offers no direct structure search by ligand KLIFS ID.However, one Ligand Expo ID can be represented by multiple ligand KLIFS IDs. Thus, in rare cases, this method will return also structure that are not connected to the input ligand KLIFS ID but to a mutual Ligand Expo ID.\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -10116,7 +10193,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 93, "metadata": {}, "outputs": [ { @@ -10161,18 +10238,18 @@ " ...\n", " \n", " \n", - " 172\n", - " 13034\n", - " 0000000000000010000001000000100000010000000000...\n", + " 188\n", + " 13617\n", + " 0000000000000010000000000000000000000000000000...\n", " \n", " \n", - " 173\n", - " 13035\n", - " 0000000000000010000001000000100000000000000001...\n", + " 189\n", + " 13618\n", + " 0000000000000000000000000000000000000000000000...\n", " \n", " \n", "\n", - "

174 rows × 2 columns

\n", + "

190 rows × 2 columns

\n", "" ], "text/plain": [ @@ -10180,13 +10257,13 @@ "0 3050 0000000000000000000000000000000000000000000000...\n", "1 3051 0000000000000000000000000000000000000000000000...\n", ".. ... ...\n", - "172 13034 0000000000000010000001000000100000010000000000...\n", - "173 13035 0000000000000010000001000000100000000000000001...\n", + "188 13617 0000000000000010000000000000000000000000000000...\n", + "189 13618 0000000000000000000000000000000000000000000000...\n", "\n", - "[174 rows x 2 columns]" + "[190 rows x 2 columns]" ] }, - "execution_count": 95, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } @@ -10197,7 +10274,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 94, "metadata": {}, "outputs": [ { @@ -10224,7 +10301,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 95, "metadata": {}, "outputs": [ { @@ -10280,7 +10357,7 @@ "1 509 " ] }, - "execution_count": 97, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -10291,7 +10368,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 96, "metadata": {}, "outputs": [ { @@ -10334,7 +10411,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 97, "metadata": {}, "outputs": [ { @@ -10429,7 +10506,7 @@ "[85 rows x 5 columns]" ] }, - "execution_count": 99, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -10440,7 +10517,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 98, "metadata": {}, "outputs": [ { @@ -10467,115 +10544,7 @@ }, { "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
residue.klifs_idresidue.idresidue.klifs_region_idresidue.klifs_regionresidue.klifs_color
01461I.1Ikhaki
12462I.2Ikhaki
..................
8384_a.l.84a.lcornflowerblue
8485_a.l.85a.lcornflowerblue
\n", - "

85 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " residue.klifs_id residue.id residue.klifs_region_id residue.klifs_region \\\n", - "0 1 461 I.1 I \n", - "1 2 462 I.2 I \n", - ".. ... ... ... ... \n", - "83 84 _ a.l.84 a.l \n", - "84 85 _ a.l.85 a.l \n", - "\n", - " residue.klifs_color \n", - "0 khaki \n", - "1 khaki \n", - ".. ... \n", - "83 cornflowerblue \n", - "84 cornflowerblue \n", - "\n", - "[85 rows x 5 columns]" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "local.pockets.by_structure_klifs_id(12347)\n", - "# Equals\n", - "local.pockets.by_structure_klifs_id(12347, extension=\"mol2\")" - ] - }, - { - "cell_type": "code", - "execution_count": 102, + "execution_count": 99, "metadata": {}, "outputs": [ { @@ -10670,19 +10639,18 @@ "[85 rows x 5 columns]" ] }, - "execution_count": 102, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# It is also possible to get pocket details from pdb files\n", - "local.pockets.by_structure_klifs_id(12347, extension=\"pdb\")" + "local.pockets.by_structure_klifs_id(12347)" ] }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 100, "metadata": {}, "outputs": [ { @@ -10722,7 +10690,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ @@ -10745,15 +10713,15 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 102, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: _0 (from QH1_0)\n", - "WARNING:opencadd.io.dataframe:Suspicious residue ID: _0 (from QH1_0)\n" + "Suspicious residue ID: _0 (from QH1_0)\n", + "Suspicious residue ID: _0 (from QH1_0)\n" ] }, { @@ -10891,7 +10859,7 @@ "[3604 rows x 11 columns]" ] }, - "execution_count": 105, + "execution_count": 102, "metadata": {}, "output_type": "execute_result" } @@ -10903,14 +10871,14 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 103, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: _0 (from QH1_0)\n" + "Suspicious residue ID: _0 (from QH1_0)\n" ] }, { @@ -10926,7 +10894,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: (from QH1)\n" + "Suspicious residue ID: (from QH1)\n" ] }, { @@ -10954,7 +10922,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 104, "metadata": {}, "outputs": [ { @@ -10993,14 +10961,14 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 105, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: _0 (from QH1_0)\n" + "Suspicious residue ID: _0 (from QH1_0)\n" ] }, { @@ -11017,7 +10985,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: (from QH1)\n" + "Suspicious residue ID: (from QH1)\n" ] }, { @@ -11032,7 +11000,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: (from HOH)\n" + "Suspicious residue ID: (from HOH)\n" ] }, { @@ -11066,7 +11034,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 106, "metadata": {}, "outputs": [ { @@ -11093,14 +11061,14 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 107, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: _0 (from QH1_0)\n" + "Suspicious residue ID: _0 (from QH1_0)\n" ] }, { @@ -11116,28 +11084,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: (from QH1)\n" + "Suspicious residue ID: (from QH1)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Pocket (pdb): Number of atoms: 1156\n", - "Ligand (mol2): Number of atoms: 49\n" + "Pocket (pdb): Number of atoms: 1156\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:opencadd.io.dataframe:Suspicious residue ID: (from HOH)\n" + "Suspicious residue ID: (from HOH)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "Ligand (mol2): Number of atoms: 49\n", "Ligand (pdb): Number of atoms: 31\n", "Water (mol2): Number of atoms: 3\n" ] @@ -11180,17 +11148,17 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 108, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3deVxU5f4H8M8sbAPDvomK+y5uIKJkWmJp4laiP/NaSYleLaybNm6Ja3fQFnJJx9stqW4ZZiqaaWSmGGoQKiCmSYgoKjIMw7DNMDPP74+DI+4IszDM9/3qxWvmcHieZ0w+fs95znkOjzEGQgghjcW39AAIIcS6UYwSQkiTUIwSQkiTUIwSQkiTUIwSQkiTUIwSctv58ygrq3t94QI0GouOhlgJilFCbps5E6+9Vvf6n/9EcbFFR0OshNDSAyCkedFq8cMPGDMGAGprUVyMigpotVCpoNOhvBx6PZRKAFCrc6uqjpWVlTHGuK9KpVKv15eXl+t0unnz5vXt29fFxcWyH4eYAcUoIXdYswYvvoinngKAAwcwZ84D9xw+vOTXX2c96LuZmZmDBg363//+Z4IxkuaFYpSQO/j7Izoaa9YAgFgMHx+IxRAI4OoKPh9ubuDx4O4OAN27u3btGuPm5sbn87mvrq6uAoFALBaXlZUtWLDg66+/njZt2nPPPWfZT0RMjUc3gxIAaWlpS5cujYiIWLx4saXHYklPPoldu+DmhqFDUVqKQ4fQpk0jm1q7dq1EIuncuXN2drajo6NRh0maF5piIgBw6dKlw4cP5+TkWHogzYJQiI8/xsWLTWrkX//6V79+/S5evPjee+8ZaVykmaIYJQBQWloKwMvLy9IDsaTDhzF5MlxdASA0FMeOwcenMe0cOHBAr9cLhUKZTMbn8+Pj48+dO2fcoZJmhWKUAIBcLgfg6elp6YFYjE6HuXPxxhvYu7duy+DBcHB47HZmzpw5evRomUwGIDQ09LXXXtNoNLNnz6azZy0YxSgBblWjthyjn3+Oc+fQoQMiI5vUDjehtHDhwqKiIgBr165t1arV0aNHv/jiC6OMkzRDFKMEuFWN2uxBfU0NVq4EgPfeg719k5qaOHHiuHHjysvL3377bQBubm7r1q0DsHnzcbncCEMlzRDFKAFsvhpdvx6FhejbF5MnA4Be36TWNmzY4OLisn379h9++AHAtGnTYmJO/vHHlnfeMcZYSfNDMUoA265Gy8oQHw8Aa9eCz4dOh9BQSCSorGxkg4GBgcuXLwcwb9686mo1gAULQoVCfP45Dh821qhJM0IxSgDbnqmXSlFaimHD8MwzAJCYiD/+wI4dsLNrfJvz5s0bMeJlb+/UNWscAHTujEWLwBj++U+o1UYaN2k26PJ7AgCenp4KhUIul9vacX1REbp0QXU10tIQFoaaGnTrhsuX8fXXmDq1SS2npyMsDEIhTp1Cz57QaNCvH86dw5o1sO1bHFogqkYJdDqdUqnk8/nu3E2OtmTLlhNCIXvhBYSFAcCGDbh8GX36YMqUprY8cCBiYqDRYPZsMAZ7e2zZAh4Pq1cjL6/pAyfNCFWjBHK53Nvb28vLq6SkxNJjMasLFy706tXLw6PzsWMZXbs6l5Whc2fI5ThwAM8+a4T2y8vRoweKivDZZ5gxAwBeeQWJiXjmGRw8aIT2STNB1Six3WvvFy9erNVqJ058smtXZwBbt/5dVobhw42ToQBcXfH++wAwfz5u3gSADz6Ajw9++gnffmucLkhzQDFKbHSaPj09/fvvv3dycnr33XcBXLt2bcWKoI4dJ65bV23EXqZOxbPPwsGh7kDeywtSKZydUV5uxE6IhVGMEhu9aHTRokWMsXnz5rVp0wbAihUrqqqq+vQRhIQ4Gbejzz7DuXN1514BzJiBtDT88gtKSwFAocDy5cbtkJgbxSixxWr04MGDhw4dcnd3X7BgAYC//vrrs88+EwgEq1atMnpfAQFwc7v9lseDmxtSUrBwIQBUV+PIEaP3ScyKYpTY3EWjjDFuWdXFixdzNfiSJUtqa2ujo6N79OhhnjGEhqKoCGlp5umNmBbFKLG5g/rt27dnZmYGBATMnTsXQEZGxnfffefo6MidJDWbjz7CvHnQas3ZJzEJeogIsbmZ+n379gGIi4sTiUQAqquru3XrNnbs2LZt25pzGF26YPRobN5szj6JSVCMEps7N+rj4wOgoqKCezt06NDs7GyNJZ5Jv2gRQkLg62v+nokx0UE9sbmD+lGjRgFYvnz5lStXuC1CoZCrTM2Dx0PnzgDg5IT330fXrmbrmZgExSixuWp01KhRkyZNUqlU8+bNM3/v1dUYMQIODqipAYDRoyGTmX8UxJgoRm2aWq3eunVrXl6el5eX7VSjADZs2ODu7v79998nJyebuev163HxIlJSmro+NGk+KEZtlEqlio+PDwwMnDVrllKp3LRpU4cOHSw9KPPx9/dfuXIlgDfeeMNwktQMysqwdi0ArFsHPv3ytRiM2BilUimVSg21Z79+/ZKSkvR6vaXHZW46nS4sLAzA/PnzzdbpO+8wgA0bZrYOiTnQCk82pKSkZOPGjR9//HFZWRmA8PBwiUQyduxYS4/LYrKysoKDgwH8/vvv/fv3N3V3d61tSloOS+c4MYcbN25IJBLDZHR4ePjPP/9s6UE1C9ws08CBA3U6nan7eu01BrCoKFP3Q8yNqtEWrqCg4MMPP/zPf/5TXV3N4/HGjBmzdOnSQYMGWXpczYVKperZs+eVK1c2b948e/Zs03V04QJ69QJjyM6Gue44JeZi6RwnppKXlxcbG+vg4ACAz+dHRkZmZGRYelDN0c6dOwG4urpevXrVdL08/zwD2KxZpuuBWAzFaAuUk5Mzffp0oVDIBWhUVFRubq6lB9WsjRs3DsDUqVNN1P7Jkyd79ZoxaJDclEFNLIZitEU5ffr09OnTBQIBAHt7++nTp1+4cMHSg7ICBQUFLi4uAH744QdTtP/000/j1gqnpOWhGG0hTpw4MXr0aO5EjZOTU2xsbGFhoaUHZU3Wrl0LoHPnzlVVVcZt+ccffwTg4eEhl8uN2zJpJmiKqSUoLi5euXLlpk2bXFxcoqOjJRJJQECApQdlZbRa7cCBA0+fPr106dK7Fm/ev3//lStXdDpdeXm5Xq9XKpWMMe6iMYVCAUCpVOr1+vLycp1Op1KpMjMznZ2duZ9ljIWEhGRmZq5bt27+/Pnm/1zEHCwc48QYEhMTAfTr14/qnaY4efIkn8+3t7c/e/Zs/e2Pe0lpSUmJ4We//vprAK1bt66srDT7ByJmQgvltQTc2iLDhw+3qfvijS40NHTmzJkymWz27NlHjhzh8Xjc9rFjxw4cOJDP57u5ufF4PHd3dwAeHh4A3Nzc+Hy+q6urQCAQi8VCodDFxYXbAUBtbe2yZcsArFy50pwrSBEzoxhtCWxtpTvTiY+P37t3b2pqamJi4iuvvMJtXLFiReNa27p168WLF7t16/bSSy8ZbYik+aEYbQlsbaU703Fzc1u3bt20adPmz58/ZswYboHn+zKcCdVqtRUVFbW1tZWVlRqNpqqqSq1W19TUlJWVLV++HIBUKuUuPiMtFf3fbQmoGjWiF198cdu2bSkpKSEhIZ07d37QhFJDmurfv/+AAQPGjx9v2hETS6MYbQmoGjWuSZMmpaWlXb58+fLlyw/ax3Am1M7OztnZ2d7eXiQSOTg4ODo6Ojk5OTg4iESisLCwGTNmmHPkxCIoRlsCW3tCsknpdLr169dXVlbOnTt34sSJ9SeU3N3deTweN6Fk6WGSZoRitCWwtUd7mtSXX3559uzZ9u3bf/DBB9yKBIQ8HC3A3RJQNWosGo2GWxV/1apVlKGkgagatXqM1f76q79W25W7K5w0xcaNG/Pz84OCgl588UVLj4VYDYpRq6fVyhn7y9HRz3C5OGkc7vlUAKRSKZ+elEQajP6uWD2tVg5AIKAj+qaKj48vLi4eOnToc889Z+mxEGtCMWr1dLpSAEIhzS81SXFx8fr16wFIpVJLj4VYGYpRq8dVo0IhVaNNsnz5cpVKNWHChCFDhlh6LMTKUIxaPa22FIBAQNVo4+Xn5//3v/8VCASrV6+29FiI9aEYtXo6HVWjTbVo0SKNRvPyyy/36tXL0mMh1odi1Opx1SidG220M2fO7Nixw9HRMS4uztJjIVaJLniyHK0W+/ahoAC9eiEiognNcDFK1WgjvfPOO3q9/vXXXw8MDLT0WIhVooeIWIhOh2efxRNPICQE+/ZBp4NEgv374eaGl19+rJZKSj5VqQ55e88Wi4eZaLAt2JEjR4YPH+7m5paXl0e3gZHGoWrUQvbuRbt2WL4cACIjMXgw1GrMmIHHvOq7puZcefnPWu3NiopfKUYfF2Ns4cKFACQSCWUoaTSKUQs5exbBwbffhoQgJwdDh8LLC56ed3z19KwYE6D3dhYIvIRCT6HQWyBwM/xcQUFMmzbrRKJgjeaBS7o9Wm0t8vPh7w9X1yZ8JOuzc+fOEydOtGrVKjY21tJjIVaMYtRCxGJUVNx+W1EBxqBQQKG4d9+rg/tUKLIMb3k8ARep3t4xPJ6guvqss3Oog0MntfpiVdUfQqH3rcD15PMbcJf9Dz8gLg5hYfjzT/TqhQ8/xN9/A0CHDmjRa7brdDruQUlxcXGGB3kS0gh0btRCcnLw2ms4ehT29lAoMHgwTpyAXg+5HKWldV9vvbgyV1styNfpSrXaEq22VKdTcm0EBKzy9JxSVLSsqiqzVatlOp3q8uV/1u+Ex3MQCj0dBO27zbG7o8jl/vP0RI8eGDoUJ07AwwMAoqLwj39AowGAyEg4Od137Evy86+p1Y58PoAVHTr42NmZ8k/KVLZu3Tpr1qwuXbqcPXvWzjo/AmkmWnK50az17o0ZM/Dkk+jYEXl5+OgjcI+TvN+aoW3ufMuYVqcr1WrlAoGnnZ1fhw7faLU3c3P7tmv3iYdHlFYr576r1Zbq9ZW1tdcEOicc/fv+w/j2W/ToUZehAMaPx/HjeNTdkBq9fmG7dl0fELJWobq6mnsY/Zo1ayhDSRNRjFrOrFmIiYFCcd/ofAgeTygU+gqFvgCqqjKcnPozpuHx7Fxdn3Nzm1B/T8bUWq1cX6nAryW3y9uSeq9dXFB/IXc7u7pS9FFyKioUtbUigSDIeg6HlUrlxYsX8/Ly8vLykpOTr1y5MmDAgEmTJll6XMTqUYxaFI/3uBl6F6Xyh6KiZYCgffvPeDz7e5p3sLMLgHsAHjSHX1aGt95CbS24iuzYMYSFNaTfv2tqlDqdl1DYPGNUoVD8fT/19/Hy8ho0aBCtLkiajs6NWrfKyvQrV+aLxU8GBKxqZBMJCUhJQVQUzp7FmTPYv/+RM0sL8vJmBgQ0h4N6rVZbWFiYl5dnKDM5lZWV9+4sEok63VJdXf3JJ5/4+/v//fffTs3ggxCrRtWoddNoCioqjtrZPfBx6o/25pt49lmcOoUxYyCVwhoe1paRkfHuu+/m5eVdunSptrb23h28vLw63alz586tWrWqv096enp6evpnn302d+5ccw2ctExUjVq3mzdlly/P9vaOaddOZrZOfykr6+vi4mWhy6FOnDixbNmylJQU7q2Hh0fHenr27Nm7d2/ucZ4Pt2vXrueff75t27YXL160t7/7fAghDUfVqHWzyJrNA1xcRp4509bR8XtLrIf022+/paSkPPfcc2vXru3UqZOjo+PD91coFPWP91944QVucfsJEyYEBQVlZ2d/+eWXr776qlnGTlomilHrZpF1SZRaLQMsNTWTnZ0NYNy4cfcuateQmSV/f38uRnk83qJFi1588cV///vfL7/8srBF32tATIr+6li3Ww9iMms1qtRqAbhZ6CwqF6NBQUHc24yMjDVr1nCVZlVV1b37Ozs71z9DOnjwYMO3Jk+evGLFivPnz3/77bfTpk0zz/hJy0Mxat1uHdSbtxrV6QC4WaJ802q1ubm5PB6vd+/e3Ba1Wr17927u9V3nSTnt27d/0GM+BQKBRCKJjo5es2bN1KlT6WmgpHEoRq1baWk3YLBO1+bRuxoPV426WiJGL1y4UFNT07FjR9dbq6j06dMnKSmJKzbd3Nwe/uP3+sc//rFq1apz587t2rXrhRdeMPZ4iU2gf36t2yuvxIeGpt24EfzoXY2n7qDeEjGalZUFoE+fPoYtYrE4KipqwIABjchQAHZ2dgsWLACwevVqumqFNA7FqHWTy4H734hvQnUH9ZY4N8qdGK0fo00XHR3dunXr06dP79+/34jNEttBMWrduHX1zByj5ZauRg3zS0bh4ODw9ttvA+AWKyHkcVGMWjGVChoNxGKY+eJxC54bFQrn9O8/Pyion3GbnTVrlq+v78mTJw8dOmTcloktoBi1YhY5ogegPXq0S26ua1mZmftVKrFnz+g//1zXuXNn47YsEonefPNNAGvWrDFuy8QWUIxasdJSADD/M4QOrV+//aWX7O+3UL9JZWWBMfTubZL7/ufOnevh4XH48OFjx44Zv3XSolGMWjFLVaNyuRyA+Z8Bl5UFAEadXrrN1dWVeyLTe++9Z5IOSMtFMWodLl6EXl/3urAQlZX44w8EB+PIEaxejbNnzTqY0tJSAJ5mz+/sbAAw6vTSHWJjY8Vi8Y8//pienm6qPkhLRDFqHSZNgkpV93rBAmRmIiQEO3bgyScREoKoKPONRKPRVFRU2NnZubg04Hl5RsVVo6aLUU9Pzzlz5oAKUvKYKEatVe/e+M9/cOOGufs1HNGbed14xuqKbtPFKIB//etfIpFoz5493JVVhDQExajV2LQJH36IDz/E+fMA4OCApUsxf765h8Ed0Zv/xGh+PsrLERAAnyYsUX0vvV4fGxt78OBB7q2vr+/MmTMZY8uWLdPpdMbsibRcFKNWo3t39OyJnj1huOlxwgQoFDhyxKzD4KpR858YNcX8kk6ni46O3rBhw7Rp01S3TpqMHz/ezs4uLS3Nz89v8uTJW7duvXbtmjF7JS0OxajVGDECo0Zh1Cj4+9/e+PHHWLwYAE6exKefwgw3hVuqGpXLIRbj1rpORqDVal955ZXExERnZ+ft27eLxWIAGRkZkyZNqq2tra2tlcvlO3bsmDVrVtu2bYcMGbJ69erMzEy6757cByPWoG9fVlZW93rKFHb0KAsOrnu7fDnr3p1168YANno0u3LFhMPIzMwMDw93d3fv0KHD1atXTdhTPbt3s6NHGWNMr2c7drCiIiO0qVarJ06cCMDNze23337jNh47doxb3yQyMrK6uvrChQsfffTRyJEjHRwcDL8v/v7+b7+9eceO2/87CKEYtQ7p6UyrrXt99iwrK2MnT9a9ra5maWksKYl5ezOAubkxmcz4Azh8+HBERAQXJc7OzgA8PDy++uor4/d0j8mTWceOTKFgjLFp025/8EarrKx85plnuI9w8lZzv/76K1eQTpkyRaPR1N+/qqoqJSUlNjY2MDAQwLBhhwEmELDgYBYXxzIymF5/e+erV1lhYd3roiImlzd1tKT5oxhtOa5fZxMmMIAB7LnnmLGKxdTU1BEjRnABKhaLY2Njs7Kyxo8fz20ZM2aMqcvSyZPZkiVszhzGjBGjFRUV3Mfx8/M7c+YMt3H//v3cY5anTZtWW1v7kB/PysrasEE+bBgTCuv+qAHWrh2bPZslJ7PKSrZ0KQsMZCoVY4wtX87M8g8NsTCK0ZYmKYl5ejKA+fiw775rfDt6vT45OXnQoEFcXHp5ecXFxZWWltbrKMnDwwOAj4/Pd03p6VEmT2bZ2eypp9jJk02NUYVCwT1EpFWrVjk5OdzG5ORk7rB91qxZOp2uwU2xpCT2yivMz+92njo5sbffZhMnsvnzGaMYtRkUoy1QQQGLiKj7xZ49u1z+mAeWOp0uOTk5OLhuKWgfH5+4uLiy+50LLCgoMBzpR0VFlZSUGOkTMMaYXs927WLfflsXozk5bMgQNnUqO3mSffMNu/Owu0FKS0tDQ0MBtGvX7uLFi9zGb775xs7ODsD8+fP19Q/OH0dODpNKWUQECw5mS5eynTtZeDg7c4Zi1FZQjLZMej2TyZirK+vbN9bPz2/37t0N+SmdTpeUlNS9e3cuGdu2bZuQkFBVVWXY4fTp0zdv3ryzI71MJuPuaPL399+zZ0/TB6/TseRkNmAAA1irVuyFF1h2NmOMLVjAPDxYfDwDWGAgk8luny9+pOvXr3OrlHbt2rXw1snLL7/8UiAQAJBIJE0fNmNMrWZLl7Lvv2d//MGefJJi1FZQjLZkeXmlTzzxBJeJr776qlKpfNCearU6MTGxS5cu3M7t27dPSEiorq427HDq1KmoqCgej7dw4cJ7f/zvv/8eNmyYoSytf+z/WDQa9vnnrGvXulI6MJBt3Mhefpnl5jLGmErFundnmzezHj3qdujfnx048OhmL1++zH20Hj16GM7kbt68mXuG3YoVKxo32vviYpQx9vrrLCiIYtQmUIy2cFy1KBKJAAQGBv7888937VBTUyOTydq0qXsoXqdOnWQyWf1plqNHj3Lz2twc/bvvvvvIjtq1a3fo0KHHGqdazRITWefOdfnYoQNLSGD1YvwOOh1LSmIdOtTtPGQIS019YMv5+fmdOnUC0L9/f0MpvXbtWgA8Hu+jjz56rHE+kiFGlUoWEEAxahMoRm1Cbm7uwIEDueCIiYlRqVSMMZVKlZCQ0KpVKy4ig4KCEhMTtfWOk1NTUyMjI7nvuri4xMbGFj3qos2zZ8/e29HDVVZWbt2aGhBQl4k9e7KvvmrQ0bpazWQy5utb94MRESwr6z67paamikSiwYMHK7hrphiTSqXcCDdu3Pjobh7T1at112YVFrJPP6ULnmwCxaitqK2tXbVqlb29PYCOHTu++uqrhhs6Bw0alJycXH+CJSUlJSwsjPuuq6urRCJp+DxVbW2tVCrlOurQocORI0cetKchxwUC+/btNX36sMTExzjdeasRJpUysZgB7KmnDkZFReXn59+1z/Hjxw2BvnTpUgACgWDbtm2P19PjqK5mDg6Mz2eNPb1BrAnFqG3Jzs4eMGCA4RL68PDw5ORkw3e5OfqQkJBHztE/UlZWVv/+/QHw+fzY2Niampr635XL5XFxcdz1UgDCwsIOHsxp7Dw5Y4xdvcreeEMjFvsBEIlECxcuNNSeBnq9ft68eQDs7e137NjR+M4a5oknGMD27jV1P8TyKEZtjlqt5u6IT0pKMmzk5uh79OjB5Zqfn59UKq2srGxKRxqNRiqVcpcT9ezZMz09nTFWXFwcFxfn7u7OdXRXjjfR+fPnuXkw7g4lqVRquMxAq9VGR0cDcHBw2LVrl7F6fIglSxhQdwEpadkoRm0RV4oajnMVCkXHjh25XOvQocOWLVvuKh6b4vjx4926dQNgZ2cXHh7OzUEBGDVq1LFjx4zVS32///674bar1q1by2Sympqal156iStUf/rpJ1N0yklPZ//+d939Yz/9xAAWEmK63khzQTFqc9RqNXdgW3/js88+27Fjx7vm6I2lurpaIpHweLzWrVvzeLzIyMiTTb8x/lEOHjzInb4AwC044urqmvqQGX1jGDuWASwxkTHGKiuZvT0TCGgRk5aPYtTmFBUVcXdD1t9YXFzc8PsgGyc8PByAKSbHH0Sv1yclJXXq1KlNmzZisfj48eOm7vH99xnAoqPr3g4ezAC2f7+puyUWRuuN2pz7rrvs4+PDXYtuOlqtFoDhHlMz4PF4UVFRp0+fLikpqaioMPrT7e81fDiA2wtpc3ckmHldbWJ+FKM2x2zrLufm5u7cufM898wTyz1P1MXFJSwsjDFmhgfQ9+sHd3fk5aGwEKAYtRkUozbHbE8B2bNnz6RJkxITE+v3a/5l8wFwN6oeMX2eCQQYMgQAUlMBIDwc9vZQKmsqKqpN3TWxIIpRm2O2OKtffur1+rKyMj6fb7jUyZzMFqMAhg0Dn4/MTBUAsRhDhow/d84pLS3VDF0TS6EYtTlmO7iuX/aWlZXp9Xp3d3duRSUzGzx4sKOj45kzZ8rKykzd19NP54nF7fftG8i9DQnpCuDo0aOm7pdYEMWozTHbudH6Za+lnifKcXR0DAkJ0ev1Zjk92k6vLz1//jx3RQRXCP/666+m7pdYEMWozTFbotUvey14YpQzfPhwmOW4XigUcgvsc5E9dOhQgUCQnp5eVVVl6q6JpVCM2hyLVKOWmqY3MGdVWP9UrJubW9++fTUazYkTJ8zQNbEIilGbY+ZqtP5BvQWr0SFDhtjb2586dUqpVJq6r7tmtMw5wUUsgmLU5pgt0RQKBQBuGSeLV6MikSg4OFin06WlpZm6r4EDBzo7O+fm5hYXF4Ni1AZQjNoc8yRaeXm5RqMRi8XcwqNmO5PwEGaLM3t7+/fee2/79u3cEjBDhw7l8/knTpyoqakxddfEIihGbY55YvSu3LTsTD3HnFVhbGzs5MmTuRj19PQMCAjQ6XTjxo3btm3bjRs3zDAAYk5CSw+AmFVlZWVNTY1IJHJycjJpR3flZnOoRp944gmhUJiRkaFSqcRisXk6ZYy9+eabV65ccXV1TUlJSUlJAdCzZ8+xY8dGREQMHz5cKKTfQatH1ahtMfO1982qGnVxcenfv79WqzXbpDlj7I033li/fr29vX18fPymTZvGjBkjEolyc3Pj4+NHjhwZEBAwffr0b77ZXlpqnhERk6AYtS0WuRPUnP0+nDmP63U63YwZMzZt2iQSifbu3Tt79uw5c+bs27dPLpenpKRIJJIePXrcvHnzq6++Wr78e19fhIRg4UIcOwbGzDA6YkwUo7blvgfX3ISycd2VmxafqeeYLUY1Gs3//d//JSYmOjs779271/CEagCOjo4RERFSqTQ3N/fPP//88MMPhw+fKxTijz8QH4+hQ9GmDWbOxPffQ6UCgF9+QUhI3eujR7FiBd58E2fO1LW2fj127TL1pyGPQDFqW+49uL506VKXLl1mzZpVUVFhxI6aZzXK3VP0+++/m/SeIrVaPWXKlO+++87d3T0lJeXpp59+0J7dunV76623ZLJhJSXYtQszZ6JNGxQV4dNP8cIL8PbGiBHIzoZKhbg4AKiqglyO68dkHSIAAAcBSURBVNdhmPMvLUV5uek+CmkQilHbwqUb99A3TlpaWnV19datWwcMGGDEayqHDh26ZMkSrvrTarUqlUooFLq6uhqr/ca57z1FhYWFYWFhK1euzMjI0Ov1Teyiqqpq7Nixu3fv9vT0/Omnn7gbQx/JxQUTJmDrVhQWIi8PCQmIiABj+OUXiESYMAGnTuHUqdv7X7uG/Hzk58P0a62QBrDw6vvEvM6fPz9q1CiBQCCRSDQaDbcxOzvb8DDkmJiYJj4Q9F7cJT6+vr7GbbZx3nrrLQDLli0zbNmyZYvh18Hb2zsqKioxMbG0UQ+YV6lUTz31FAA/P7+srKwmDrW0lCUns+RkJpGw06dZeDjbv5+98QabMoVNmMBiYlhMDAsOZtu2NbEf0lQUozYnNjaWq0ZDQ0Nzc3O5jfd9GLKx5ObmAujevbsR22y03bt3Axg2bJhhS1VVFTfnwz3BlCMQCIKDg+Pi4rgStSEtKxSKsLAwAG3btr1w4YKxBszFKGPszTfZ1Kl1MXriRN134+IoRi2PYtQWpaamdurUCYCjo6NUKtVqtdz2kydPdu/eHYBQKJRIJGq12ijdffDBBwC6du1q6qfmNYRcLufz+XZ2dkePHr13PHl5eQkJCREREdzNVxw/P7/p06cnJSUplcoHNXvjxo2+ffsCaN++/cWLF404YEOMKpWsdev7x+i337ING5jxnopNHg/FqI1SKpUxMTFcWTp48ODz589z27mHIXOLKwcFBWVmZja6C71en5ycHBoailt31g8ZMsSIZVqjLVmyJDAwEICXl1dUVJRMJrt27dpd+yiVyu+++y46OrpVq1aGPHVwcBg5cuSVK1fu2vnatWu9e/cG0K1bt8LCQuOONjf39rNFf/6Z7dnDtm1jly7VbfnxR3bwIDt+nO3dy/77X+P2TBqKYtSmHThwoE2bNgCcnJykUqmhOvvtt9+6dOkCwM7OLi4uzlCuNpBWq/3mm2+CgoK49PH394+Ojvbz8wPg4uKyefPmBh4mm4hOp3v99dc7duxY/xA+PDx8zZo1mZmZ944tJydHKpVGRETY2dmJxeK7ivSCggLuz6pnz55FRUVm/Bx3+PRT9ssvlurc1lGM2jqFQhETE8OlSUREREFBAbe9srJSIpFwT10eNGjQuXPnGtKaRqNJTEzkzgwACAwMTEhIqKqququjkSNHXr582YSfqmHy8vJkMllkZKSDg4MhUn19fblDeIVCcdf+JSUlR44cqb8lPz+fi+MBAwbcvHnTjGO/w4ED7JNPLNU5oRgljDHG9u3bxx29urq6ymQyw/aDBw9y5apIJNq3b99DWlCr1YmJiVxdxp0iTEhIqK6uvmu3HTt2+Pj43NuRZVVWVqakpMTGxrZt2/auElUqlT5oluncuXOtW7cGEB4eXlZWZv5hc86cYc88wyQS9uOPlhqCraMYJXVu3Ljx/PPPcwkyatQowxlA7iyqt7f39evX7/uDNTU1MpmMS1sAnTp1kslktbW1D+ro+vXr48eP53YeO3bstQc0ayn3nWVq3759TExMUlJSeXk5t1tOTg73D8+wYcMMG4ltohgld0hKSuLuNXJ3d69fLd43Q1UqVUJCgmESJigoKDExsYEnUpOSkjw9PT0CAp4/fXqn5Q6HH6K0tHT79u0vvfSSr6+vIU+dnJxGjRr1zjvvcDdojR49mjtlQWwZxSi527Vr18aNG8elxqRJk+57yk+pVEqlUsO9nv37909KSnrciaPCwsJ3U1ODMzKCMzIW5uWVPbiAtTjDLBO3rh2fz7e3t3/++eeNdU0YsWo8RuvJkPv54osvXn/9dZVK5evru2XLlokTJ3Lbb968uWnTpo8//ph75nt4eLhEIhk7dmyjO/pBLo8vLKzS6TyFwkXt2j3l7m6cD2AaxcXFBw4c+OuvvyIjI4ODg2m1UAKAYpQ8UEFBQXR09C+//AIgKipq1apVn3/++YYNG7h1PcLDw1esWDFixIimd3RNo1lx6VKGSgUgwsNjcWCgK8UTsR4Uo+Rh9Hr9xo0bFy1aVFVV5ejoWFNTw+Pxxo0bt3Tp0pCQECN2xIBdJSUfFRZW6/XednZL2rXrIRJdVqu577ZzcPCyszNid4QYEcUoebQLFy7MmzevR48e165dW7x4seG6eqMrqKlZfulSdmUlDxjv41OsVge5uAAY6ubWXSQyUaeENBHFKGle9MC3xcV75fJILy8w9qKfn6VHRMgjUIyS5kjH2M6Skp8VCq4InRMQ4MintXFJM0Un8klzJODxAPQSiUZ7egKwr7fONCHNDcUoab587Oy60ilR0uzRgRIhhDQJnRslzZRSq2WAO11ASpo9ilFCCGkSOqgnhJAmoRglhJAmoRglhJAmoRglhJAmoRglhJAm+X8U1V9jMIYuMgAAAjB6VFh0cmRraXRQS0wgcmRraXQgMjAyMC4wOS4xAAB4nHu/b+09BiDgAWJGBgiQB2IlIG5gZEvQANLMLGwJFiCakQUhAKUVDJBoJgSNrg7TIFwmYlGBoRRdhlNBAeR+KMWuABFmZodoQNAQCSYMCQ4FkH//M8JoAQWQOCsLNwNHAoNgBhODuAIjUwYTo3gCE3MCE68CM2cGE7NEAgsrAysbA5scAztHBhO7cAK7mAKHCAMnVwKneAIXdwYTN08Cj0QGE4+kAi9fBhOvQAKfVAK/VAYTv3SCgHQGk6BQgqAog5BwBpOYHIOMHIOsHIMICysjEzOnOBu7sJAgAwcbFzePBDMnG58Uv7QArzgsghjkG6S793eYX9gP4nhwstvf/c1gD2Jz32FwaM81BbNzt2s5RKqpgNXYzvI/MLt0ogOI/bYn7MAqbWcw+3PFtAMeAZJgNkNq9oEfqjvA6tkZRA+k3/MAs6N+nLIXFbsOZvO++2Hfd8DjAIituDbCYW5JMpht+KXPQfOzApi9RU3PIYyBCWI+T7tDjyQrmM0c1+KgsHQqmB3255u9WE8wmP2QYeb+wNvuYL2ZT5UObN5WCGZvCC860HHJGswO5+U7cFreDOyvzaU2ttX7a8Hii7PnHnDbLQF226GdbXYdjJVg9pGyZgfv6nYw245/r4PFcog5HpvmOjTsiwTbe+WEhkOrxBQwm0s15IBmhRfYfMFLJw9cZ2sEs70/zTzwxikNzBYDAFHjlN45N0kPAAACMnpUWHRNT0wgcmRraXQgMjAyMC4wOS4xAAB4nJ1WS27cMAzd+xS6wAgkRVHissj0AxRJgBToHbrqprl/qc/IHiQBYg4MDJ9pPvMvy+vr3/hlC+33cv35519YP7puW0gYEocA716qGn4TADT7C0aoUockJKVxQDQthIfwEcXx2oZFppKnBKn6WCgCNr+bbYHq9IWiSGksF4jCSC6WS4rEIw6OmbN4WVKmHlGKwJp9LGzZoDRiS5qdLClKqTAqbaH5snsxD6jiyG6p7MtLwJhrLrPrKrCXRZnriA2TOLvOalR7fY0lZwYfC0eWkkaGMrJ7AgohzllImLy+EMHsFyjqnCNuOc1DkiL+GpHKzDNkH4t1CWpOs9JEvuy2yvCNpRRnvzTbKjL4NLF7jgh1Tk9BPObl25m9CywwvFK567rPs9h+UdS5MbPKcaafzvhCkuZm0ELFx2JdB5VGzcGWhI8lR4WxpVqNCL2+KPWNab3LcwOfZ7HzKPXN0OZo8p1maSfJ2ipFQQ8sz29Z8AMWsRkUHXMJcOfLCRaO1Yo0bKvenWq/3rK88yGydW4db+gAjY5uoJ2YU2P30tLQUZM68wK4Aw4Iy4YD5aUxSRYwqSxQ2jfTDdTuG90AwUHTumACDbRs7I14BLujdrt9gww2k3ARmNR8owWWbxYM7hoDy1G7TTuQDmiBujSlg6mpAVeuDRAtoD248ZhFtueA9AAS7OAxhIcf37uNPWNTGpra/pvm69N1+w9/eaUC90aVpwAAAXh6VFh0U01JTEVTIHJka2l0IDIwMjAuMDkuMQAAeJwlkjtuIzEQRK+yoQRQRP8/cCjAoTfY0HA0uU7gw281hQGImQJfdbE4z+fz3+377+Pn/l6/Lr5un/fr9njJhfXS64VH73iBcN2+7i98zJaL//zeHrrFNHvZdvPg9QFFXdSXbrJ2g2I7SSSWbG13OUoxlS7a1W3eb4wtA1I2VQek2EpRvXgTFb255tZeD9reEVQHjCyy9eBtWjEgBkmxzrYsC7OjCbcff2YThQTfAjtkSARS0JZQ1hiyU6JGckkrB+ikZQBlE6tN+qRKOkoKM+OEpCy5PmyLkOoI2WOD5FLEaMXQhY+AOVYoLjKMxyMUI0eQbIRhIO0FRMnfcyPSfMKBEJ4tXp7nAEWIPYxNi2gF9ok5oMtnj253HV/bFqm8UImzFcrxjcr5KK1CJy7uBzGnIy/MnLbQn9qxFsEMVEMWxHTMG4fh0zN+hCF1Z067c0M593LcLWXdf/8DMjWCK33TS9MAAAAASUVORK5CYII=\n", + "image/png": "\n", "text/plain": [ - "" + "" ] }, - "execution_count": 111, + "execution_count": 108, "metadata": {}, "output_type": "execute_result" } @@ -11201,17 +11169,17 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 109, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "" + "" ] }, - "execution_count": 112, + "execution_count": 109, "metadata": {}, "output_type": "execute_result" } @@ -11222,7 +11190,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 110, "metadata": {}, "outputs": [ { @@ -11256,17 +11224,17 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 111, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "" + "" ] }, - "execution_count": 114, + "execution_count": 111, "metadata": {}, "output_type": "execute_result" } @@ -11277,17 +11245,17 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 112, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "" + "" ] }, - "execution_count": 115, + "execution_count": 112, "metadata": {}, "output_type": "execute_result" } @@ -11305,17 +11273,17 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 113, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "" + "" ] }, - "execution_count": 116, + "execution_count": 113, "metadata": {}, "output_type": "execute_result" } @@ -11334,7 +11302,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 114, "metadata": {}, "outputs": [ { @@ -11354,7 +11322,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 115, "metadata": {}, "outputs": [ { @@ -11374,7 +11342,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 116, "metadata": {}, "outputs": [ { @@ -11394,7 +11362,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 117, "metadata": {}, "outputs": [ { @@ -11421,7 +11389,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 118, "metadata": {}, "outputs": [ { @@ -11441,7 +11409,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 119, "metadata": {}, "outputs": [ { @@ -11462,7 +11430,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -11476,7 +11444,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.9.5" } }, "nbformat": 4, diff --git a/opencadd/data/klifs_fields.csv b/opencadd/data/klifs_fields.csv new file mode 100644 index 00000000..918edd7a --- /dev/null +++ b/opencadd/data/klifs_fields.csv @@ -0,0 +1,101 @@ +field_type,opencadd.df_name,opencadd.df_type,klifs.remote,klifs.local_export,klifs.local_overview +kinase_groups,kinase.group,string,,, +kinase_families,kinase.family,string,,, +kinases_all,kinase.klifs_id,Int32,kinase_ID,, +kinases_all,kinase.klifs_name,string,name,, +kinases_all,kinase.full_name,string,full_name,, +kinases_all,kinase.gene_name,string,gene_name,, +kinases_all,kinase.uniprot,string,accession,, +kinases_all,species.klifs,string,species,, +kinases,kinase.klifs_id,int32,kinase_ID,, +kinases,kinase.klifs_name,string,name,, +kinases,kinase.full_name,string,full_name,, +kinases,kinase.gene_name,string,gene_name,, +kinases,kinase.family,string,family,, +kinases,kinase.group,string,group,, +kinases,kinase.subfamily,string,subfamily,, +kinases,species.klifs,string,species,, +kinases,kinase.uniprot,string,uniprot,, +kinases,kinase.iuphar,string,iuphar,, +kinases,kinase.pocket,string,pocket,, +ligands,ligand.klifs_id,Int32,ligand_ID,, +ligands,ligand.expo_id,string,PDB-code,, +ligands,ligand.name,string,Name,, +ligands,ligand.smiles,string,SMILES,, +ligands,ligand.inchikey,string,InChIKey,, +structures,structure.klifs_id,Int32,structure_ID,, +structures,structure.pdb_id,string,pdb,PDB,pdb +structures,structure.alternate_model,string,alt,ALTERNATE_MODEL,alt +structures,structure.chain,string,chain,CHAIN,chain +structures,species.klifs,string,species,SPECIES,species +structures,kinase.klifs_id,Int32,kinase_ID,, +structures,kinase.klifs_name,string,kinase,,kinase +structures,kinase.names,string,,NAME, +structures,kinase.family,string,,FAMILY, +structures,kinase.group,string,,GROUPS, +structures,structure.pocket,string,pocket,,pocket +structures,ligand.expo_id,string,ligand,PDB_IDENTIFIER,orthosteric_PDB +structures,ligand_allosteric.expo_id,string,allosteric_ligand,ALLOSTERIC_PDB,allosteric_PDB +structures,ligand.klifs_id,Int32,ligand_ID,, +structures,ligand_allosteric.klifs_id,Int32,allosteric_ligand_ID,, +structures,ligand.name,string,,LIGAND, +structures,ligand_allosteric.name,string,,ALLOSTERIC_NAME, +structures,structure.dfg,string,DFG,DFG, +structures,structure.ac_helix,string,aC_helix,AC_HELIX, +structures,structure.resolution,float32,resolution,,resolution +structures,structure.qualityscore,float32,quality_score,,qualityscore +structures,structure.missing_residues,int32,missing_residues,,missing_residues +structures,structure.missing_atoms,int32,missing_atoms,,missing_atoms +structures,structure.rmsd1,float32,rmsd1,,rmsd1 +structures,structure.rmsd2,float32,rmsd2,,rmsd2 +structures,interaction.fingerprint,string,,,full_ifp +structures,structure.front,boolean,front,, +structures,structure.gate,boolean,gate,, +structures,structure.back,boolean,back,, +structures,structure.fp_i,boolean,fp_I,,fp_i +structures,structure.fp_ii,boolean,fp_II,,fp_ii +structures,structure.bp_i_a,boolean,bp_I_A,,bp_i_a +structures,structure.bp_i_b,boolean,bp_I_B,,bp_i_b +structures,structure.bp_ii_in,boolean,bp_II_in,,bp_ii_in +structures,structure.bp_ii_a_in,boolean,bp_II_A_in,,bp_ii_a_in +structures,structure.bp_ii_b_in,boolean,bp_II_B_in,,bp_ii_b_in +structures,structure.bp_ii_out,boolean,bp_II_out,,bp_ii_out +structures,structure.bp_ii_b,boolean,bp_II_B,,bp_ii_b +structures,structure.bp_iii,boolean,bp_III,,bp_iii +structures,structure.bp_iv,boolean,bp_IV,,bp_iv +structures,structure.bp_v,boolean,bp_V,,bp_v +structures,structure.grich_distance,float32,Grich_distance,, +structures,structure.grich_angle,float32,Grich_angle,, +structures,structure.grich_rotation,float32,Grich_rotation,, +structures,structure.filepath,string,,, +structures,structure.curation_flag,boolean,curation_flag,, +bioactivities,kinase.pref_name,string,pref_name,, +bioactivities,kinase.uniprot,string,accession,, +bioactivities,kinase.chembl_id,string,target_chembl_id,, +bioactivities,ligand.chembl_id,string,chembl_id,, +bioactivities,ligand.bioactivity_standard_type,string,standard_type,, +bioactivities,ligand.bioactivity_standard_relation,string,standard_relation,, +bioactivities,ligand.bioactivity_standard_value,float32,standard_value,, +bioactivities,ligand.bioactivity_standard_units,string,standard_units,, +bioactivities,ligand.bioactivity_pchembl_value,float32,pchembl_value,, +bioactivities,species.chembl,string,organism,, +interactions,structure.klifs_id,Int32,structure_ID,, +interactions,interaction.fingerprint,string,IFP,, +interaction_types,interaction.id,string,position,, +interaction_types,interaction.name,string,name,, +pockets,residue.klifs_id,Int32,index,, +pockets,residue.id,string,Xray_position,, +pockets,residue.klifs_region_id,string,KLIFS_position,, +pockets,residue.klifs_region,string,,, +pockets,residue.klifs_color,string,,, +coordinates,atom.id,int32,,, +coordinates,atom.name,string,,, +coordinates,atom.x,float32,,, +coordinates,atom.y,float32,,, +coordinates,atom.z,float32,,, +coordinates,residue.id,string,,, +coordinates,residue.name,string,,, +coordinates,residue.klifs_id,Int32,,, +coordinates,residue.klifs_region_id,string,,, +coordinates,residue.klifs_region,string,,, +coordinates,residue.klifs_color,string,,, diff --git a/opencadd/data/klifs_ids.20201121.csv.zip b/opencadd/data/klifs_ids.20201121.csv.zip new file mode 100644 index 00000000..cb063d7e Binary files /dev/null and b/opencadd/data/klifs_ids.20201121.csv.zip differ diff --git a/opencadd/data/klifs_ids.20210913.csv.zip b/opencadd/data/klifs_ids.20210913.csv.zip new file mode 100644 index 00000000..38b339da Binary files /dev/null and b/opencadd/data/klifs_ids.20210913.csv.zip differ diff --git a/opencadd/data/klifs_ids.ipynb b/opencadd/data/klifs_ids.ipynb index e2459574..4b4d67e0 100644 --- a/opencadd/data/klifs_ids.ipynb +++ b/opencadd/data/klifs_ids.ipynb @@ -18,7 +18,15 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:opencadd.databases.klifs.api:If you want to see an non-truncated version of the DataFrames in this module, use `pd.set_option('display.max_columns', 50)` in your notebook.\n" + ] + } + ], "source": [ "from datetime import date\n", "\n", @@ -31,7 +39,16 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:opencadd.databases.klifs.api:Set up remote session...\n", + "INFO:opencadd.databases.klifs.api:Remote session is ready!\n" + ] + } + ], "source": [ "# Work with remote KLIFS data\n", "remote = setup_remote()" @@ -53,7 +70,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "(12430, 7)\n" + "(12521, 7)\n" ] }, { @@ -88,7 +105,7 @@ " \n", " \n", " \n", - " 8104\n", + " 8165\n", " 1\n", " 3dko\n", " A\n", @@ -98,7 +115,7 @@ " IHZ\n", " \n", " \n", - " 8102\n", + " 8167\n", " 2\n", " 2rei\n", " B\n", @@ -108,7 +125,7 @@ " -\n", " \n", " \n", - " 8103\n", + " 8164\n", " 3\n", " 3dko\n", " B\n", @@ -118,7 +135,7 @@ " IHZ\n", " \n", " \n", - " 8105\n", + " 8169\n", " 4\n", " 2rei\n", " A\n", @@ -128,7 +145,7 @@ " -\n", " \n", " \n", - " 10188\n", + " 10197\n", " 5\n", " 3v8t\n", " B\n", @@ -143,18 +160,18 @@ ], "text/plain": [ " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", - "8104 1 3dko A \n", - "8102 2 2rei B \n", - "8103 3 3dko B \n", - "8105 4 2rei A \n", - "10188 5 3v8t B \n", + "8165 1 3dko A \n", + "8167 2 2rei B \n", + "8164 3 3dko B \n", + "8169 4 2rei A \n", + "10197 5 3v8t B \n", "\n", " structure.chain kinase.klifs_name kinase.klifs_id ligand.expo_id \n", - "8104 A EphA7 415 IHZ \n", - "8102 A EphA7 415 - \n", - "8103 A EphA7 415 IHZ \n", - "8105 A EphA7 415 - \n", - "10188 A ITK 474 477 " + "8165 A EphA7 415 IHZ \n", + "8167 A EphA7 415 - \n", + "8164 A EphA7 415 IHZ \n", + "8169 A EphA7 415 - \n", + "10197 A ITK 474 477 " ] }, "execution_count": 3, @@ -205,8 +222,8 @@ "outputs": [], "source": [ "# Save local copy of KLIFS IDs\n", - "filename = f\"klifs_ids.{date.today().strftime('%Y%m%d')}.csv.gz\"\n", - "structures_all.to_csv(filename, index=None, compression=\"gzip\")" + "filename = f\"klifs_ids.{date.today().strftime('%Y%m%d')}.csv.zip\"\n", + "structures_all.to_csv(filename, index=None, compression=\"zip\")" ] }, { @@ -396,15 +413,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/opencadd/databases/klifs/core.py b/opencadd/databases/klifs/core.py index 2e3aabac..e2f22b58 100644 --- a/opencadd/databases/klifs/core.py +++ b/opencadd/databases/klifs/core.py @@ -5,6 +5,7 @@ """ import logging +import html from bravado_core.exception import SwaggerMappingError import numpy as np @@ -32,7 +33,9 @@ def _ensure_list(value): # https://stackoverflow.com/questions/16301253/what-exactly-is-pythons-iterator-protocol # test for behaviour (value.__iter__()) instead of type # can be removed once input lists are unpacked *kinases_ids - if not isinstance(value, list): + if value is None: + return value + elif not isinstance(value, list): return [value] else: return value @@ -64,7 +67,7 @@ def _abc_to_dataframe(abc_object): for key in keys: results_dict[key].append(result[key]) - return pd.DataFrame(results_dict) + return pd.DataFrame(results_dict).apply(html.unescape) @staticmethod def _map_old_to_new_column_names(dataframe, columns_mapping): @@ -167,8 +170,8 @@ def _standardize_dataframe(self, dataframe, columns, columns_mapping=None): ---------- dataframe : pandas.DataFrame Remote query result. - columns : list of (str, str) - Column names and dtypes(in the order of interest for output). + columns : dict + Column names and dtypes (in the order of interest for output). columns_mapping : dict or None Mapping of old to new column names. If None, no changes are made. @@ -183,19 +186,14 @@ def _standardize_dataframe(self, dataframe, columns, columns_mapping=None): dataframe = self._map_old_to_new_column_names(dataframe, columns_mapping) # Add missing columns (None values) - column_names = [column[0] for column in columns] + column_names = list(columns.keys()) dataframe = self._add_missing_columns(dataframe, column_names) # Standardize column values dataframe = self._standardize_column_values(dataframe) # Standardize dtypes - column_dtypes_dict = { - column_name: column_dtype - for (column_name, column_dtype) in columns - if column_name in dataframe.columns - } - dataframe = dataframe.astype(column_dtypes_dict) + dataframe = dataframe.astype(columns) # Select and sort columns dataframe = dataframe[column_names].copy() @@ -293,7 +291,7 @@ class KinasesProvider(BaseProvider): kinase.klifs_name : str Kinase name according to KLIFS. - kinase.class : str + kinase.subfamily : str Kinase class. Available remotely only. kinase.full_name : str @@ -314,7 +312,7 @@ class KinasesProvider(BaseProvider): kinase.klifs_id : int Kinase KLIFS ID. - kinase.hgnc_name : str + kinase.gene_name : str Kinase name according to the HUGO Gene Nomenclature Committee. Available remotely only. kinase.family : str diff --git a/opencadd/databases/klifs/fields.py b/opencadd/databases/klifs/fields.py new file mode 100644 index 00000000..850d61a1 --- /dev/null +++ b/opencadd/databases/klifs/fields.py @@ -0,0 +1,169 @@ +""" +opencadd.databases.klifs.fields + +Defines the fields available in KLIFS (remote and local) and their mapping to the names used +in opencadd. +""" + +import pandas as pd + + +class Fields: + """ + Class for KLIFS fields (remote and local) and their mapping to the names used in opencadd. + + Attributes + ---------- + df : pandas.DataFrame + Contains all field names in KLIFS (remote and local) and the names/dtypes used in opencadd. + Column names: + - field_types: KLIFS models as named in opencadd: + - kinase_groups + - kinase_families + - kinases_all + - kinases + - ligands + - structures + - bioactivities + - interactions + - interaction_types + - pockets + - coordinates + - opencadd.df_name: Field name as used in opencadd + - opencadd.df_type: Field dtype as used in opencadd + - klifs.remote: Field names as used in KLIFS remote + - klifs.local_export: Field names as used in the KLIFS download file `KLIFS_export.csv` + - klifs.local_overview: Field names as use din the KLIFS download file `overview.csv` + + Notes + ----- + - "kinase_names": Kinase names: "kinase.gene_name (kinase.klifs_name)" + - "kinase.klifs_name": Depending on availability: Manning name or UniProt gene name + - "kinase.full_name": Depending on availability: HGNC gene name or Manning name or UniProt gene name + - "kinase.gene_name": HGNC or MGI name + - "kinase.uniprot": UniProt accession + """ + + def __init__(self, fields_path): + self.df = pd.read_csv(fields_path) + + def _select_field_type(self, field_type): + """ + Subset DataFrame by field type. + + Parameters + ---------- + field_type : str + Field type, check class docstring for possible types. + + Returns + ------- + pandas.DataFrame + Subset DataFrame with selected fields. + """ + df = self.df.copy() + return df.groupby("field_type").get_group(field_type) + + def _to_dict(self, field_type, key_column_name, value_column_name): + """ + Select two DataFrame columns as keys and values to generate a dictionary. + + Parameters + ---------- + field_type : str + Field type, check class docstring for possible types. + key_column_name : str + Column name for the column to be used as keys. + value_column_name : str + Column name for the column to be used as values. + + Returns + ------- + dict + Selected columns formatted as dict. + """ + df = self._select_field_type(field_type) + + # Select 2 columns and drop all rows with any NaN values + df = df[[key_column_name, value_column_name]].dropna(how="any") + + # Cast DataFrame to dict + if len(df) == 1: + # If DataFrame has only one row, use ugly hack + return {df.values[0][0]: df.values[0][1]} + else: + return df.set_index(key_column_name).squeeze().to_dict() + + def remote_to_oc_names(self, field_type): + """ + Get a KLIFS remote to opencadd names mapping as dict. + + Parameters + ---------- + field_type : str + Field type, check class docstring for possible types. + + Returns + ------- + dict + KLIFS remote to opencadd names mapping. + """ + dict_ = self._to_dict(field_type, "klifs.remote", "opencadd.df_name") + return dict_ + + def local_export_to_oc_name(self, field_type="structures"): + """ + Get a KLIFS local (`KLIFS_export.csv` download) to opencadd names mapping as dict. + + Parameters + ---------- + field_type : str + Field type, check class docstring for possible types. + + Returns + ------- + dict + KLIFS local (`KLIFS_export.csv` download) to opencadd names mapping. + """ + dict_ = self._to_dict(field_type, "klifs.local_export", "opencadd.df_name") + return dict_ + + def local_overview_to_oc_name(self, field_type="structures"): + """ + Get a KLIFS local (`overview.csv` download) to opencadd names mapping as dict. + + Parameters + ---------- + field_type : str + Field type, check class docstring for possible types. + + Returns + ------- + dict + KLIFS local (`overview.csv` download) to opencadd names mapping. + """ + dict_ = self._to_dict(field_type, "klifs.local_overview", "opencadd.df_name") + return dict_ + + def oc_name_to_type(self, field_type, additional_dict=None): + """ + Get an opencadd name to dtype mapping as dict. Used to standardize the opencadd output + DataFrames! + + Parameters + ---------- + field_type : str + Field type, check class docstring for possible types. + additional_dict : dict or None. + If set, add this dictionary to the default dictionary. + + Returns + ------- + dict + opencadd name to dtype mapping. + """ + dict_ = self._to_dict(field_type, "opencadd.df_name", "opencadd.df_type") + if additional_dict is not None: + for key, value in additional_dict.items(): + dict_[key] = value + return dict_ diff --git a/opencadd/databases/klifs/local.py b/opencadd/databases/klifs/local.py index 49fe9565..99739bb1 100644 --- a/opencadd/databases/klifs/local.py +++ b/opencadd/databases/klifs/local.py @@ -20,12 +20,10 @@ CoordinatesProvider, ) from .schema import ( - LOCAL_COLUMNS_MAPPING, - DATAFRAME_COLUMNS, + FIELDS, POCKET_KLIFS_REGIONS, ) -from .remote import KLIFS_CLIENT -from .utils import PATH_DATA, metadata_to_filepath, filepath_to_metadata +from .utils import KLIFS_CLIENT, PATH_DATA, metadata_to_filepath, filepath_to_metadata from .exceptions import KlifsPocketIncompleteError, KlifsPocketUnequalSequenceStructure from opencadd.io import DataFrame, Rdkit @@ -139,7 +137,7 @@ def _from_klifs_export_file(self, klifs_export_path): # Unify column names with column names in overview.csv klifs_export.rename( - columns=LOCAL_COLUMNS_MAPPING["klifs_export"], + columns=FIELDS.local_export_to_oc_name(), inplace=True, ) @@ -151,7 +149,7 @@ def _from_klifs_export_file(self, klifs_export_path): for i in klifs_export["kinase.names"] # pylint: disable=E1136 ] klifs_export["kinase.names"] = kinase_names - klifs_export.insert(1, "kinase.hgnc_name", [i[0] for i in kinase_names]) + klifs_export.insert(1, "kinase.gene_name", [i[0] for i in kinase_names]) klifs_export.insert(2, "kinase.klifs_name", [i[-1] for i in kinase_names]) return klifs_export @@ -177,7 +175,7 @@ def _from_klifs_overview_file(klifs_overview_path): # Unify column names with column names in KLIFS_export.csv klifs_overview.rename( - columns=LOCAL_COLUMNS_MAPPING["klifs_overview"], + columns=FIELDS.local_overview_to_oc_name(), inplace=True, ) @@ -384,34 +382,37 @@ def all_kinase_groups(self): kinase_groups = self._database.copy() # Standardize DataFrame kinase_groups = self._standardize_dataframe( - kinase_groups, DATAFRAME_COLUMNS["kinase_groups"] + kinase_groups, FIELDS.oc_name_to_type("kinase_groups") ) return kinase_groups - def all_kinase_families(self, group=None): + def all_kinase_families(self, groups=None): # Get local database and select rows kinase_families = self._database.copy() - if group: - kinase_families = kinase_families[kinase_families["kinase.group"] == group] + groups = self._ensure_list(groups) + if groups: + kinase_families = kinase_families[kinase_families["kinase.group"].isin(groups)] # Standardize DataFrame kinase_families = self._standardize_dataframe( - kinase_families, DATAFRAME_COLUMNS["kinase_families"] + kinase_families, FIELDS.oc_name_to_type("kinase_families") ) return kinase_families - def all_kinases(self, group=None, family=None, species=None): + def all_kinases(self, groups=None, families=None, species=None): # Get local database and select rows kinases = self._database.copy() - if group: - kinases = kinases[kinases["kinase.group"] == group] - if family: - kinases = kinases[kinases["kinase.family"] == family] + groups = self._ensure_list(groups) + families = self._ensure_list(families) + if groups: + kinases = kinases[kinases["kinase.group"].isin(groups)] + if families: + kinases = kinases[kinases["kinase.family"].isin(families)] if species: kinases = kinases[kinases["species.klifs"] == species.capitalize()] # Standardize DataFrame - kinases = self._standardize_dataframe(kinases, DATAFRAME_COLUMNS["kinases_all"]) + kinases = self._standardize_dataframe(kinases, FIELDS.oc_name_to_type("kinases_all")) return kinases def by_kinase_klifs_id(self, kinase_klifs_ids): @@ -421,7 +422,7 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): kinases = self._database.copy() kinases = kinases[kinases["kinase.klifs_id"].isin(kinase_klifs_ids)] # Standardize DataFrame - kinases = self._standardize_dataframe(kinases, DATAFRAME_COLUMNS["kinases"]) + kinases = self._standardize_dataframe(kinases, FIELDS.oc_name_to_type("kinases")) return kinases def by_kinase_name(self, kinase_names, species=None): @@ -433,13 +434,13 @@ def by_kinase_name(self, kinase_names, species=None): kinase_names = [kinase_name.upper() for kinase_name in kinase_names] kinases = kinases[ kinases["kinase.klifs_name"].str.upper().isin(kinase_names) - | kinases["kinase.hgnc_name"].str.upper().isin(kinase_names) + | kinases["kinase.gene_name"].str.upper().isin(kinase_names) ] # Search for species (case insensitive) if species: kinases = kinases[kinases["species.klifs"].str.upper() == species.upper()] # Standardize DataFrame - kinases = self._standardize_dataframe(kinases, DATAFRAME_COLUMNS["kinases"]) + kinases = self._standardize_dataframe(kinases, FIELDS.oc_name_to_type("kinases")) return kinases @@ -455,7 +456,7 @@ def all_ligands(self): # Get local database ligands = self._database.copy() # Standardize DataFrame - ligands = self._standardize_dataframe(ligands, DATAFRAME_COLUMNS["ligands"]) + ligands = self._standardize_dataframe(ligands, FIELDS.oc_name_to_type("ligands")) return ligands def by_kinase_klifs_id(self, kinase_klifs_ids): @@ -466,8 +467,7 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): ligands = ligands[ligands["kinase.klifs_id"].isin(kinase_klifs_ids)] # Standardize DataFrame ligands = self._standardize_dataframe( - ligands, - DATAFRAME_COLUMNS["ligands"] + [("kinase.klifs_id", "int32")], + ligands, FIELDS.oc_name_to_type("ligands", {"kinase.klifs_id": "int32"}) ) # Rename columns to indicate columns involved in query TODO remove (query) stuff # can columns have metadata? @@ -489,23 +489,25 @@ def by_kinase_name(self, kinase_names): kinase_names = [kinase_name.upper() for kinase_name in kinase_names] ligands = ligands[ ligands["kinase.klifs_name"].str.upper().isin(kinase_names) - | ligands["kinase.hgnc_name"].str.upper().isin(kinase_names) + | ligands["kinase.gene_name"].str.upper().isin(kinase_names) ] # Standardize DataFrame ligands = self._standardize_dataframe( ligands, - DATAFRAME_COLUMNS["ligands"] - + [ - ("kinase.klifs_name", "string"), - ("kinase.hgnc_name", "string"), - ("species.klifs", "string"), - ], + FIELDS.oc_name_to_type( + "ligands", + { + "kinase.klifs_name": "string", + "kinase.gene_name": "string", + "species.klifs": "string", + }, + ), ) # Rename columns to indicate columns involved in query ligands.rename( columns={ "kinase.klifs_name": "kinase.klifs_name (query)", - "kinase.hgnc_name": "kinase.hgnc_name (query)", + "kinase.gene_name": "kinase.gene_name (query)", "species.klifs": "species.klifs (query)", }, inplace=True, @@ -521,7 +523,7 @@ def by_ligand_expo_id(self, ligand_expo_ids): # Standardize DataFrame ligands = self._standardize_dataframe( ligands, - DATAFRAME_COLUMNS["ligands"], + FIELDS.oc_name_to_type("ligands"), ) return ligands @@ -540,7 +542,7 @@ def all_structures(self): # Standardize DataFrame structures = self._standardize_dataframe( structures, - DATAFRAME_COLUMNS["structures"], + FIELDS.oc_name_to_type("structures"), ) return structures @@ -553,7 +555,7 @@ def by_structure_klifs_id(self, structure_klifs_ids): # Standardize DataFrame structures = self._standardize_dataframe( structures, - DATAFRAME_COLUMNS["structures"], + FIELDS.oc_name_to_type("structures"), ) # Check: If only one structure ID was given, only one result is allowed if len(structure_klifs_ids) == 1: @@ -571,7 +573,7 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): # Standardize DataFrame structures = self._standardize_dataframe( structures, - DATAFRAME_COLUMNS["structures"], + FIELDS.oc_name_to_type("structures"), ) return structures @@ -591,7 +593,7 @@ def by_structure_pdb_id( # Standardize DataFrame structures = self._standardize_dataframe( structures, - DATAFRAME_COLUMNS["structures"], + FIELDS.oc_name_to_type("structures"), ) return structures @@ -604,7 +606,7 @@ def by_ligand_expo_id(self, ligand_expo_ids): # Standardize DataFrame structures = self._standardize_dataframe( structures, - DATAFRAME_COLUMNS["structures"], + FIELDS.oc_name_to_type("structures"), ) return structures @@ -617,12 +619,12 @@ def by_kinase_name(self, kinase_names): kinase_names = [kinase_name.upper() for kinase_name in kinase_names] structures = structures[ structures["kinase.klifs_name"].str.upper().isin(kinase_names) - | structures["kinase.hgnc_name"].str.upper().isin(kinase_names) + | structures["kinase.gene_name"].str.upper().isin(kinase_names) ] # Standardize DataFrame structures = self._standardize_dataframe( structures, - DATAFRAME_COLUMNS["structures"], + FIELDS.oc_name_to_type("structures"), ) return structures @@ -649,7 +651,7 @@ def all_interactions(self): # Standardize DataFrame interactions = self._standardize_dataframe( interactions, - DATAFRAME_COLUMNS["interactions"], + FIELDS.oc_name_to_type("interactions"), ) return interactions @@ -662,7 +664,7 @@ def by_structure_klifs_id(self, structure_klifs_ids): # Standardize DataFrame interactions = self._standardize_dataframe( interactions, - DATAFRAME_COLUMNS["interactions"], + FIELDS.oc_name_to_type("interactions"), ) return interactions @@ -674,8 +676,7 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): interactions = interactions[interactions["kinase.klifs_id"].isin(kinase_klifs_ids)] # Standardize DataFrame interactions = self._standardize_dataframe( - interactions, - DATAFRAME_COLUMNS["interactions"] + [("kinase.klifs_id", "int32")], + interactions, FIELDS.oc_name_to_type("interactions", {"kinase.klifs_id": "int32"}) ) # Rename columns to indicate columns involved in query interactions.rename( @@ -749,7 +750,7 @@ def by_structure_klifs_id(self, structure_klifs_id, extension="mol2"): # pylint # Standardize DataFrame dataframe = self._standardize_dataframe( dataframe, - DATAFRAME_COLUMNS["pockets"], + FIELDS.oc_name_to_type("pockets"), ) # Add KLIFS region and color TODO not so nice to have this after standardization dataframe = self._add_klifs_region_details(dataframe) diff --git a/opencadd/databases/klifs/remote.py b/opencadd/databases/klifs/remote.py index b5cbb7d4..13561736 100644 --- a/opencadd/databases/klifs/remote.py +++ b/opencadd/databases/klifs/remote.py @@ -20,7 +20,7 @@ PocketsProvider, CoordinatesProvider, ) -from .schema import REMOTE_COLUMNS_MAPPING, DATAFRAME_COLUMNS +from .schema import FIELDS from .utils import metadata_to_filepath, silence_logging from opencadd.io import DataFrame, Rdkit @@ -71,25 +71,32 @@ def all_kinase_groups(self): # Use KLIFS API result = self._client.Information.get_kinase_groups().response().result # Convert list to DataFrame (1 column) - column = DATAFRAME_COLUMNS["kinase_groups"][0] - kinase_groups = pd.DataFrame({column[0]: pd.Series(result, dtype=column[1])}) + column_name = list(FIELDS.oc_name_to_type("kinase_groups").keys())[0] + column_dtype = list(FIELDS.oc_name_to_type("kinase_groups").values())[0] + kinase_groups = pd.DataFrame({column_name: pd.Series(result, dtype=column_dtype)}) return kinase_groups - def all_kinase_families(self, group=None): + def all_kinase_families(self, groups=None): + groups = self._ensure_list(groups) # Use KLIFS API - result = self._client.Information.get_kinase_families(kinase_group=group).response().result + result = ( + self._client.Information.get_kinase_families(kinase_group=groups).response().result + ) # Convert list to DataFrame (1 column) - column = DATAFRAME_COLUMNS["kinase_families"][0] - kinase_families = pd.DataFrame({column[0]: pd.Series(result, dtype=column[1])}) + column_name = list(FIELDS.oc_name_to_type("kinase_families").keys())[0] + column_dtype = list(FIELDS.oc_name_to_type("kinase_families").values())[0] + kinase_families = pd.DataFrame({column_name: pd.Series(result, dtype=column_dtype)}) return kinase_families - def all_kinases(self, group=None, family=None, species=None): + def all_kinases(self, groups=None, families=None, species=None): + groups = self._ensure_list(groups) + families = self._ensure_list(families) # Use KLIFS API result = ( self._client.Information.get_kinase_names( - kinase_group=group, kinase_family=family, species=species + kinase_group=groups, kinase_family=families, species=species ) .response() .result @@ -98,7 +105,9 @@ def all_kinases(self, group=None, family=None, species=None): kinases = self._abc_to_dataframe(result) # Standardize DataFrame kinases = self._standardize_dataframe( - kinases, DATAFRAME_COLUMNS["kinases_all"], REMOTE_COLUMNS_MAPPING["kinases_all"] + kinases, + FIELDS.oc_name_to_type("kinases_all"), + FIELDS.remote_to_oc_names("kinases_all"), ) return kinases @@ -115,17 +124,13 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): kinases = self._abc_to_dataframe(result) # Standardize DataFrame kinases = self._standardize_dataframe( - kinases, DATAFRAME_COLUMNS["kinases"], REMOTE_COLUMNS_MAPPING["kinases"] + kinases, FIELDS.oc_name_to_type("kinases"), FIELDS.remote_to_oc_names("kinases") ) return kinases def by_kinase_name(self, kinase_names, species=None): kinase_names = self._ensure_list(kinase_names) - # FIXME: Unexpected KLIFS Swagger input: "ABL1, EGFR" instead ["ABL1", "EGFR"] - # Fixed at https://klifs.net/swagger_v2/ - # Use workaround here and update when v2 becomes default - kinase_names = ", ".join(kinase_names) # Use KLIFS API result = ( self._client.Information.get_kinase_ID(kinase_name=kinase_names, species=species) @@ -136,7 +141,7 @@ def by_kinase_name(self, kinase_names, species=None): kinases = self._abc_to_dataframe(result) # Standardize DataFrame kinases = self._standardize_dataframe( - kinases, DATAFRAME_COLUMNS["kinases"], REMOTE_COLUMNS_MAPPING["kinases"] + kinases, FIELDS.oc_name_to_type("kinases"), FIELDS.remote_to_oc_names("kinases") ) return kinases @@ -162,7 +167,7 @@ def all_ligands(self): ligands = self._abc_to_dataframe(result) # Standardize DataFrame ligands = self._standardize_dataframe( - ligands, DATAFRAME_COLUMNS["ligands"], REMOTE_COLUMNS_MAPPING["ligands"] + ligands, FIELDS.oc_name_to_type("ligands"), FIELDS.remote_to_oc_names("ligands") ) return ligands @@ -173,8 +178,8 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): # Standardize DataFrame ligands = self._standardize_dataframe( ligands, - DATAFRAME_COLUMNS["ligands"] + [("kinase.klifs_id (query)", "int32")], - REMOTE_COLUMNS_MAPPING["ligands"], + FIELDS.oc_name_to_type("ligands", {"kinase.klifs_id (query)": "int32"}), + FIELDS.remote_to_oc_names("ligands"), ) return ligands @@ -201,7 +206,7 @@ def _by_kinase_klifs_id(self, kinase_klifs_id): ligands = self._abc_to_dataframe(result) # Standardize DataFrame ligands = self._standardize_dataframe( - ligands, DATAFRAME_COLUMNS["ligands"], REMOTE_COLUMNS_MAPPING["ligands"] + ligands, FIELDS.oc_name_to_type("ligands"), FIELDS.remote_to_oc_names("ligands") ) # Rename column to indicate query key ligands["kinase.klifs_id (query)"] = kinase_klifs_id @@ -216,14 +221,14 @@ def by_kinase_name(self, kinase_names): kinases_remote = Kinases(self._client) kinases = kinases_remote.by_kinase_name(kinase_names) # Select and rename columns to indicate columns involved in query - kinases = kinases[ # pylint: disable=E1136 - ["kinase.klifs_id", "kinase.klifs_name", "kinase.hgnc_name", "species.klifs"] + kinases = kinases[ + ["kinase.klifs_id", "kinase.klifs_name", "kinase.gene_name", "species.klifs"] ] kinases.rename( columns={ "kinase.klifs_id": "kinase.klifs_id (query)", "kinase.klifs_name": "kinase.klifs_name (query)", - "kinase.hgnc_name": "kinase.hgnc_name (query)", + "kinase.gene_name": "kinase.gene_name (query)", "species.klifs": "species.klifs (query)", }, inplace=True, @@ -245,7 +250,7 @@ def by_ligand_klifs_id(self, ligand_klifs_ids): ligands = ligands[ligands["ligand.klifs_id"].isin(ligand_klifs_ids)] # Standardize DataFrame ligands = self._standardize_dataframe( - ligands, DATAFRAME_COLUMNS["ligands"], REMOTE_COLUMNS_MAPPING["ligands"] + ligands, FIELDS.oc_name_to_type("ligands"), FIELDS.remote_to_oc_names("ligands") ) return ligands @@ -258,7 +263,7 @@ def by_ligand_expo_id(self, ligand_expo_ids): ligands = ligands[ligands["ligand.expo_id"].isin(ligand_expo_ids)] # Standardize DataFrame ligands = self._standardize_dataframe( - ligands, DATAFRAME_COLUMNS["ligands"], REMOTE_COLUMNS_MAPPING["ligands"] + ligands, FIELDS.oc_name_to_type("ligands"), FIELDS.remote_to_oc_names("ligands") ) return ligands @@ -280,7 +285,9 @@ def all_structures(self): structures = self.by_kinase_klifs_id(kinase_klifs_ids) # Standardize DataFrame structures = self._standardize_dataframe( - structures, DATAFRAME_COLUMNS["structures"], REMOTE_COLUMNS_MAPPING["structures"] + structures, + FIELDS.oc_name_to_type("structures"), + FIELDS.remote_to_oc_names("structures"), ) return structures @@ -297,32 +304,24 @@ def by_structure_klifs_id(self, structure_klifs_ids): structures = self._abc_to_dataframe(result) # Standardize DataFrame structures = self._standardize_dataframe( - structures, DATAFRAME_COLUMNS["structures"], REMOTE_COLUMNS_MAPPING["structures"] + structures, + FIELDS.oc_name_to_type("structures"), + FIELDS.remote_to_oc_names("structures"), ) return structures def by_ligand_klifs_id(self, ligand_klifs_ids): - # TODO in the future: Approach incorrect: One PDB can have multiple KLIFS IDs - - _logger.warning( - f"This method uses this lookup: ligand KLIFS ID > Ligand Expo ID > structures." - f"The KLIFS Swagger API offers no direct structure search by ligand KLIFS ID." - f"However, one Ligand Expo ID can be represented by multiple ligand KLIFS IDs. " - f"Thus, in rare cases, this method will return also structure that are not connected " - f"to the input ligand KLIFS ID but to a mutual Ligand Expo ID." - ) - ligand_klifs_ids = self._ensure_list(ligand_klifs_ids) - # Use KLIFS API: Get Ligand Expo IDs for ligand KLIFS IDs - remote_ligands = Ligands(self._client) - ligands = remote_ligands.by_ligand_klifs_id(ligand_klifs_ids) - # Use KLIFS API: Get structures from Ligand Expo IDs - ligand_expo_ids = ligands["ligand.expo_id"].to_list() - structures = self.by_ligand_expo_id(ligand_expo_ids) + # Use KLIFS API: Get all structures + structures = self.all_structures() + # Select structures by ligand KLIFS IDs + structures = structures[structures["ligand.klifs_id"].isin(ligand_klifs_ids)] # Standardize DataFrame structures = self._standardize_dataframe( - structures, DATAFRAME_COLUMNS["structures"], REMOTE_COLUMNS_MAPPING["structures"] + structures, + FIELDS.oc_name_to_type("structures"), + FIELDS.remote_to_oc_names("structures"), ) return structures @@ -339,7 +338,9 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): structures = self._abc_to_dataframe(result) # Standardize DataFrame structures = self._standardize_dataframe( - structures, DATAFRAME_COLUMNS["structures"], REMOTE_COLUMNS_MAPPING["structures"] + structures, + FIELDS.oc_name_to_type("structures"), + FIELDS.remote_to_oc_names("structures"), ) return structures @@ -358,7 +359,9 @@ def by_structure_pdb_id( structures = self._abc_to_dataframe(result) # Standardize DataFrame structures = self._standardize_dataframe( - structures, DATAFRAME_COLUMNS["structures"], REMOTE_COLUMNS_MAPPING["structures"] + structures, + FIELDS.oc_name_to_type("structures"), + FIELDS.remote_to_oc_names("structures"), ) # If only one structure PDB ID is given, check alternate model and chain filters if len(structure_pdb_ids) == 1: @@ -376,7 +379,9 @@ def by_ligand_expo_id(self, ligand_expo_ids): structures = structures[structures["ligand.expo_id"].isin(ligand_expo_ids)] # Standardize DataFrame structures = self._standardize_dataframe( - structures, DATAFRAME_COLUMNS["structures"], REMOTE_COLUMNS_MAPPING["structures"] + structures, + FIELDS.oc_name_to_type("structures"), + FIELDS.remote_to_oc_names("structures"), ) return structures @@ -389,7 +394,9 @@ def by_kinase_name(self, kinase_names): structures = structures[structures["kinase.klifs_name"].isin(kinase_names)] # Standardize DataFrame structures = self._standardize_dataframe( - structures, DATAFRAME_COLUMNS["structures"], REMOTE_COLUMNS_MAPPING["structures"] + structures, + FIELDS.oc_name_to_type("structures"), + FIELDS.remote_to_oc_names("structures"), ) return structures @@ -418,8 +425,8 @@ def all_bioactivities(self, _top_n=None): # Standardize DataFrame bioactivities = self._standardize_dataframe( bioactivities, - DATAFRAME_COLUMNS["bioactivities"], - REMOTE_COLUMNS_MAPPING["bioactivities"], + FIELDS.oc_name_to_type("bioactivities"), + FIELDS.remote_to_oc_names("bioactivities"), ) return bioactivities @@ -435,8 +442,8 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): # Standardize DataFrame bioactivities = self._standardize_dataframe( bioactivities, - DATAFRAME_COLUMNS["bioactivities"], - REMOTE_COLUMNS_MAPPING["bioactivities"], + FIELDS.oc_name_to_type("bioactivities"), + FIELDS.remote_to_oc_names("bioactivities"), ) return bioactivities @@ -447,8 +454,8 @@ def by_ligand_klifs_id(self, ligand_klifs_ids): # Standardize DataFrame bioactivities = self._standardize_dataframe( bioactivities, - DATAFRAME_COLUMNS["bioactivities"] + [("ligand.klifs_id (query)", "int32")], - REMOTE_COLUMNS_MAPPING["bioactivities"], + FIELDS.oc_name_to_type("bioactivities", {"ligand.klifs_id (query)": "int32"}), + FIELDS.remote_to_oc_names("bioactivities"), ) return bioactivities @@ -459,8 +466,8 @@ def by_ligand_expo_id(self, ligand_expo_id): # Standardize DataFrame bioactivities = self._standardize_dataframe( bioactivities, - DATAFRAME_COLUMNS["bioactivities"] + [("ligand.expo_id (query)", "string")], - REMOTE_COLUMNS_MAPPING["bioactivities"], + FIELDS.oc_name_to_type("bioactivities", {"ligand.expo_id (query)": "string"}), + FIELDS.remote_to_oc_names("bioactivities"), ) return bioactivities @@ -490,8 +497,8 @@ def _by_ligand_klifs_id(self, ligand_klifs_id): # Standardize DataFrame bioactivities = self._standardize_dataframe( bioactivities, - DATAFRAME_COLUMNS["bioactivities"], - REMOTE_COLUMNS_MAPPING["bioactivities"], + FIELDS.oc_name_to_type("bioactivities"), + FIELDS.remote_to_oc_names("bioactivities"), ) # Rename column to indicate query key bioactivities["ligand.klifs_id (query)"] = ligand_klifs_id @@ -523,8 +530,8 @@ def _by_ligand_expo_id(self, ligand_expo_id): # Standardize DataFrame bioactivities = self._standardize_dataframe( bioactivities, - DATAFRAME_COLUMNS["bioactivities"], - REMOTE_COLUMNS_MAPPING["bioactivities"], + FIELDS.oc_name_to_type("bioactivities"), + FIELDS.remote_to_oc_names("bioactivities"), ) # Rename column to indicate query key bioactivities["ligand.expo_id (query)"] = ligand_expo_id @@ -548,8 +555,8 @@ def interaction_types(self): # Standardize DataFrame interaction_types = self._standardize_dataframe( interaction_types, - DATAFRAME_COLUMNS["interaction_types"], - REMOTE_COLUMNS_MAPPING["interaction_types"], + FIELDS.oc_name_to_type("interaction_types"), + FIELDS.remote_to_oc_names("interaction_types"), ) return interaction_types @@ -561,11 +568,12 @@ def all_interactions(self): # Use KLIFS API: Get all interactions from these structures KLIFS IDs structure_klifs_ids = structures["structure.klifs_id"].to_list() interactions = self.by_structure_klifs_id(structure_klifs_ids) + print(interactions) # Standardize DataFrame interactions = self._standardize_dataframe( interactions, - DATAFRAME_COLUMNS["interactions"], - REMOTE_COLUMNS_MAPPING["interaction_types"], + FIELDS.oc_name_to_type("interactions"), + FIELDS.remote_to_oc_names("interactions"), ) return interactions @@ -582,7 +590,9 @@ def by_structure_klifs_id(self, structure_klifs_ids): interactions = self._abc_to_dataframe(result) # Standardize DataFrame interactions = self._standardize_dataframe( - interactions, DATAFRAME_COLUMNS["interactions"], REMOTE_COLUMNS_MAPPING["interactions"] + interactions, + FIELDS.oc_name_to_type("interactions"), + FIELDS.remote_to_oc_names("interactions"), ) return interactions @@ -597,7 +607,9 @@ def by_ligand_klifs_id(self, ligand_klifs_ids): interactions = self.by_structure_klifs_id(structure_klifs_ids) # Standardize DataFrame interactions = self._standardize_dataframe( - interactions, DATAFRAME_COLUMNS["interactions"], REMOTE_COLUMNS_MAPPING["interactions"] + interactions, + FIELDS.oc_name_to_type("interactions"), + FIELDS.remote_to_oc_names("interactions"), ) return interactions @@ -612,7 +624,9 @@ def by_kinase_klifs_id(self, kinase_klifs_ids): interactions = self.by_structure_klifs_id(structure_klifs_ids) # Standardize DataFrame interactions = self._standardize_dataframe( - interactions, DATAFRAME_COLUMNS["interactions"], REMOTE_COLUMNS_MAPPING["interactions"] + interactions, + FIELDS.oc_name_to_type("interactions"), + FIELDS.remote_to_oc_names("interactions"), ) return interactions @@ -638,7 +652,7 @@ def by_structure_klifs_id(self, structure_klifs_id): pocket = pd.DataFrame(result) # Standardize DataFrame pocket = self._standardize_dataframe( - pocket, DATAFRAME_COLUMNS["pockets"], REMOTE_COLUMNS_MAPPING["pockets"] + pocket, FIELDS.oc_name_to_type("pockets"), FIELDS.remote_to_oc_names("pockets") ) # Add KLIFS region and color TODO not so nice to have this after standardization pocket = self._add_klifs_region_details(pocket) @@ -849,9 +863,3 @@ def _add_residue_klifs_ids(self, dataframe, structure_klifs_id): dataframe = dataframe.astype({"residue.klifs_id": "Int64"}) return dataframe - - -KLIFS_API_DEFINITIONS = "https://klifs.net/swagger/swagger.json" -KLIFS_CLIENT = SerializableSwaggerClient.from_url( - KLIFS_API_DEFINITIONS, config={"validate_responses": False} -) diff --git a/opencadd/databases/klifs/schema.py b/opencadd/databases/klifs/schema.py index 24d0cd55..883f4c08 100644 --- a/opencadd/databases/klifs/schema.py +++ b/opencadd/databases/klifs/schema.py @@ -4,272 +4,16 @@ Defines schema used across the klifs module. """ -import pandas as pd +from pathlib import Path -DATAFRAME_COLUMNS = { - "kinase_groups": [ - ("kinase.group", "string"), - ], - "kinase_families": [ - ("kinase.family", "string"), - ], - "kinases_all": [ - ("kinase.klifs_id", "int32"), - ("kinase.hgnc_name", "string"), # TODO except for kinase KLIFS IDs: 529, 530 - ("kinase.full_name", "string"), - ("species.klifs", "string"), - ], - "kinases": [ - ("kinase.klifs_id", "int32"), - ("kinase.klifs_name", "string"), # TODO where from? - ("kinase.hgnc_name", "string"), - ("kinase.family", "string"), - ("kinase.group", "string"), - ("kinase.class", "string"), # TODO where from? - ("species.klifs", "string"), - ("kinase.full_name", "string"), - ("kinase.uniprot", "string"), - ("kinase.iuphar", "string"), - ("kinase.pocket", "string"), - ], - "ligands": [ - ("ligand.klifs_id", "Int32"), # TODO use int32 when ligand ID avail. locally - ("ligand.expo_id", "string"), - ("ligand.name", "string"), - ("ligand.smiles", "string"), - ("ligand.inchikey", "string"), - ], - "structures": [ - ("structure.klifs_id", "int32"), - ("structure.pdb_id", "string"), - ("structure.alternate_model", "string"), - ("structure.chain", "string"), - ("species.klifs", "string"), - ("kinase.klifs_id", "int32"), - ("kinase.klifs_name", "string"), # TODO where from? - # "kinase.names", # Excluded, otherwise operations like drop_duplicates() do not work - ("kinase.family", "string"), - ("kinase.group", "string"), - ("structure.pocket", "string"), - ("ligand.expo_id", "string"), - ("ligand_allosteric.expo_id", "string"), - ("ligand.name", "string"), - ("ligand_allosteric.name", "string"), - ("structure.dfg", "string"), - ("structure.ac_helix", "string"), - ("structure.resolution", "float32"), - ("structure.qualityscore", "float32"), - ("structure.missing_residues", "int32"), - ("structure.missing_atoms", "int32"), - ("structure.rmsd1", "float32"), - ("structure.rmsd2", "float32"), - ("structure.front", "boolean"), - ("structure.gate", "boolean"), - ("structure.back", "boolean"), - ("structure.fp_i", "boolean"), - ("structure.fp_ii", "boolean"), - ("structure.bp_i_a", "boolean"), - ("structure.bp_i_b", "boolean"), - ("structure.bp_ii_in", "boolean"), - ("structure.bp_ii_a_in", "boolean"), - ("structure.bp_ii_b_in", "boolean"), - ("structure.bp_ii_out", "boolean"), - ("structure.bp_ii_b", "boolean"), - ("structure.bp_iii", "boolean"), - ("structure.bp_iv", "boolean"), - ("structure.bp_v", "boolean"), - ("structure.grich_distance", "float32"), - ("structure.grich_angle", "float32"), - ("structure.grich_rotation", "float32"), - ("structure.filepath", "string"), - ], - "bioactivities": [ - # TODO in the future: "kinase.klifs_id" # Add if added to KLIFS API? - ("kinase.pref_name", "string"), - ("kinase.uniprot", "string"), - # TODO in the future: "ligand.klifs_id" # Add if added to KLIFS API? - ("ligand.bioactivity_standard_type", "string"), - ("ligand.bioactivity_standard_relation", "string"), - ("ligand.bioactivity_standard_value", "float32"), - ("ligand.bioactivity_standard_units", "string"), - ("ligand.bioactivity_pchembl_value", "float32"), - ("species.chembl", "string"), - ], - "interactions": [ - ("structure.klifs_id", "int32"), - ("interaction.fingerprint", "string"), - ], - "interaction_types": [ - ("interaction.id", "int32"), - ("interaction.name", "string"), - ], - "pockets": [ - ("residue.klifs_id", "int32"), - ("residue.id", "string"), - ("residue.klifs_region_id", "string"), - ("residue.klifs_region", "string"), - ("residue.klifs_color", "string"), - ], - "coordinates": [ - ("atom.id", "int32"), - ("atom.name", "string"), - ("atom.x", "float32"), - ("atom.y", "float32"), - ("atom.z", "float32"), - ("residue.id", "string"), # TODO: int32? - ("residue.name", "string"), - ("residue.klifs_id", "int32"), - ("residue.klifs_region_id", "string"), - ("residue.klifs_region", "string"), - ("residue.klifs_color", "string"), - ], -} +import pandas as pd +from opencadd.databases.klifs.fields import Fields -LOCAL_COLUMNS_MAPPING = { - "klifs_export": { - "NAME": "kinase.names", # HGNC and KLIFS (?) name TODO where is KLIFS name from? - "FAMILY": "kinase.family", - "GROUPS": "kinase.group", - "PDB": "structure.pdb_id", - "CHAIN": "structure.chain", - "ALTERNATE_MODEL": "structure.alternate_model", - "SPECIES": "species.klifs", - "LIGAND": "ligand.name", - "PDB_IDENTIFIER": "ligand.expo_id", - "ALLOSTERIC_NAME": "ligand_allosteric.name", - "ALLOSTERIC_PDB": "ligand_allosteric.expo_id", - "DFG": "structure.dfg", - "AC_HELIX": "structure.ac_helix", - }, - "klifs_overview": { - "species": "species.klifs", - "kinase": "kinase.klifs_name", - "pdb": "structure.pdb_id", - "alt": "structure.alternate_model", - "chain": "structure.chain", - "orthosteric_PDB": "ligand.expo_id", - "allosteric_PDB": "ligand_allosteric.expo_id", - "rmsd1": "structure.rmsd1", - "rmsd2": "structure.rmsd2", - "qualityscore": "structure.qualityscore", - "pocket": "structure.pocket", - "resolution": "structure.resolution", - "missing_residues": "structure.missing_residues", - "missing_atoms": "structure.missing_atoms", - "full_ifp": "interaction.fingerprint", - "fp_i": "structure.fp_i", - "fp_ii": "structure.fp_ii", - "bp_i_a": "structure.bp_i_a", - "bp_i_b": "structure.bp_i_b", - "bp_ii_in": "structure.bp_ii_in", - "bp_ii_a_in": "structure.bp_ii_a_in", - "bp_ii_b_in": "structure.bp_ii_b_in", - "bp_ii_out": "structure.bp_ii_out", - "bp_ii_b": "structure.bp_ii_b", - "bp_iii": "structure.bp_iii", - "bp_iv": "structure.bp_iv", - "bp_v": "structure.bp_v", - }, -} +# PATH_DATA = Path(__name__).parent / "opencadd/data" +PATH_DATA = Path(__file__).parent / "../../data" -REMOTE_COLUMNS_MAPPING = { - # Information.get_kinase_names() - "kinases_all": { - "kinase_ID": "kinase.klifs_id", - "name": "kinase.hgnc_name", - "full_name": "kinase.full_name", - "species": "species.klifs", - }, - # Information.get_kinase_information() - "kinases": { - "kinase_ID": "kinase.klifs_id", - "name": "kinase.klifs_name", - "HGNC": "kinase.hgnc_name", - "family": "kinase.family", - "group": "kinase.group", - "kinase_class": "kinase.class", - "species": "species.klifs", - "full_name": "kinase.full_name", - "uniprot": "kinase.uniprot", - "iuphar": "kinase.iuphar", - "pocket": "kinase.pocket", - }, - # Ligands.get_ligands_list - "ligands": { - "ligand_ID": "ligand.klifs_id", - "PDB-code": "ligand.expo_id", - "Name": "ligand.name", - "SMILES": "ligand.smiles", - "InChIKey": "ligand.inchikey", - }, - # Structures.get_structure_list() - # Structures.get_structure_lists() - "structures": { - "structure_ID": "structure.klifs_id", - "kinase": "kinase.klifs_name", - "species": "species.klifs", - "kinase_ID": "kinase.klifs_id", - "pdb": "structure.pdb_id", - "alt": "structure.alternate_model", - "chain": "structure.chain", - "rmsd1": "structure.rmsd1", - "rmsd2": "structure.rmsd2", - "pocket": "structure.pocket", - "resolution": "structure.resolution", - "quality_score": "structure.qualityscore", - "missing_residues": "structure.missing_residues", - "missing_atoms": "structure.missing_atoms", - "ligand": "ligand.expo_id", - "allosteric_ligand": "ligand_allosteric.expo_id", - "DFG": "structure.dfg", - "aC_helix": "structure.ac_helix", - "Grich_distance": "structure.grich_distance", - "Grich_angle": "structure.grich_angle", - "Grich_rotation": "structure.grich_rotation", - "front": "structure.front", - "gate": "structure.gate", - "back": "structure.back", - "fp_I": "structure.fp_i", - "fp_II": "structure.fp_ii", - "bp_I_A": "structure.bp_i_a", - "bp_I_B": "structure.bp_i_b", - "bp_II_in": "structure.bp_ii_in", - "bp_II_A_in": "structure.bp_ii_a_in", - "bp_II_B_in": "structure.bp_ii_b_in", - "bp_II_out": "structure.bp_ii_out", - "bp_II_B": "structure.bp_ii_b", - "bp_III": "structure.bp_iii", - "bp_IV": "structure.bp_iv", - "bp_V": "structure.bp_v", - }, - # Ligands.get_bioactivity_list_id() - "bioactivities": { - "pref_name": "kinase.pref_name", - "accession": "kinase.uniprot", - "organism": "species.chembl", - "standard_type": "ligand.bioactivity_standard_type", - "standard_relation": "ligand.bioactivity_standard_relation", - "standard_value": "ligand.bioactivity_standard_value", - "standard_units": "ligand.bioactivity_standard_units", - "pchembl_value": "ligand.bioactivity_pchembl_value", - }, - # Interactions.get_interactions_get_IFP() - "interactions": { - "structure_ID": "structure.klifs_id", - "IFP": "interaction.fingerprint", - }, - # Interactions.get_interactions_get_types() - "interaction_types": { - "position": "interaction.id", - "name": "interaction.name", - }, - # Interactions.get_interactions_match_residues() - "pockets": { - "index": "residue.klifs_id", - "Xray_position": "residue.id", - "KLIFS_position": "residue.klifs_region_id", - }, -} +PATH_FIELDS = PATH_DATA / "klifs_fields.csv" +FIELDS = Fields(PATH_FIELDS) POCKET_KLIFS_REGIONS = [ (1, "I"), diff --git a/opencadd/databases/klifs/session.py b/opencadd/databases/klifs/session.py index 2b1f547f..b37e92c0 100644 --- a/opencadd/databases/klifs/session.py +++ b/opencadd/databases/klifs/session.py @@ -10,7 +10,7 @@ from . import remote from . import local -from .remote import KLIFS_CLIENT +from .utils import KLIFS_CLIENT class Session: diff --git a/opencadd/databases/klifs/utils.py b/opencadd/databases/klifs/utils.py index eb31be8c..7474c36d 100644 --- a/opencadd/databases/klifs/utils.py +++ b/opencadd/databases/klifs/utils.py @@ -9,7 +9,13 @@ from pathlib import Path import re +from bravado.client import SwaggerClient + _logger = logging.getLogger(__name__) + +KLIFS_API_DEFINITIONS = "https://dev.klifs.net/swagger_v2/swagger.json" +KLIFS_CLIENT = SwaggerClient.from_url(KLIFS_API_DEFINITIONS, config={"validate_responses": False}) + PATH_DATA = Path(__file__).parent / ".." / ".." / "data" diff --git a/opencadd/tests/data/klifs/klifs_metadata.csv b/opencadd/tests/data/klifs/klifs_metadata.csv index c15425ff..34991f4d 100644 --- a/opencadd/tests/data/klifs/klifs_metadata.csv +++ b/opencadd/tests/data/klifs/klifs_metadata.csv @@ -1,4 +1,4 @@ -species.klifs,structure.pdb_id,structure.alternate_model,structure.chain,ligand.expo_id,ligand_allosteric.expo_id,structure.rmsd1,structure.rmsd2,structure.qualityscore,structure.pocket,structure.resolution,structure.missing_residues,structure.missing_atoms,interaction.fingerprint,structure.fp_i,structure.fp_ii,structure.bp_i_a,structure.bp_i_b,structure.bp_ii_in,structure.bp_ii_a_in,structure.bp_ii_b_in,structure.bp_ii_out,structure.bp_ii_b,structure.bp_iii,structure.bp_iv,structure.bp_v,kinase.names,kinase.hgnc_name,kinase.klifs_name,kinase.family,kinase.group,ligand.name,ligand_allosteric.name,structure.dfg,structure.ac_helix,structure.filepath,structure.klifs_id,kinase.klifs_id +species.klifs,structure.pdb_id,structure.alternate_model,structure.chain,ligand.expo_id,ligand_allosteric.expo_id,structure.rmsd1,structure.rmsd2,structure.qualityscore,structure.pocket,structure.resolution,structure.missing_residues,structure.missing_atoms,interaction.fingerprint,structure.fp_i,structure.fp_ii,structure.bp_i_a,structure.bp_i_b,structure.bp_ii_in,structure.bp_ii_a_in,structure.bp_ii_b_in,structure.bp_ii_out,structure.bp_ii_b,structure.bp_iii,structure.bp_iv,structure.bp_v,kinase.names,kinase.gene_name,kinase.klifs_name,kinase.family,kinase.group,ligand.name,ligand_allosteric.name,structure.dfg,structure.ac_helix,structure.filepath,structure.klifs_id,kinase.klifs_id Human,3sxr,-,A,1N1,-,0.839,1.967,6.4,KELGS____VVKLVAVKMIEFFQEAQTMMKLSPKLVKFYGVYIVTEYISNGCLLNYLRSFLESHQFIHRDLAARNCLVVSDFGMT,2.4,4,0,0000000000000010000001000000000000000000000000000000000000000000000000100000000000000000000000000010000001000000100000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000001000000000000010001000000000110000010011001000000100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000100000000000001010000000000000000000000000,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,['BMX'],BMX,BMX,Tec,TK,"N-(2-CHLORO-6-METHYLPHENYL)-2-({6-[4-(2-HYDROXYETHYL)PIPERAZIN-1-YL]-2-METHYLPYRIMIDIN-4-YL}AMINO)-1,3-THIAZOLE-5-CARBOXAMIDE",-,out-like,in,HUMAN/BMX/3sxr_chainA,3482,472 Human,6uuo,-,A,QH1,-,0.806,2.028,5.2,QRI____FGTVYKVAVKMLAFKNEVGVLRKTRVNILLFMGYAIVTQWCEGSSLYHHLHAYLHAKSIIHRDLKSNNIFLIGDF___,3.29,7,61,0000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000010000001000000100000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000010000000000000100000000000000000000000000000000001000000000000010000000000000110000010010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000101000000000000000000100000010001001001000000000000000000000000,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,['BRAF'],BRAF,BRAF,RAF,TKL,"1-[4-{[1-(3-{[dihydroxy(propyl)-lambda~4~-sulfanyl]amino}-2,6-difluorophenyl)-3-(pyrimidin-5-yl)-1H-pyrrolo[3,2-b]pyridin-5-yl](methyl)amino}pyridin-1(4H)-yl]ethan-1-one",-,in,out-like,HUMAN/BRAF/6uuo_chainA,12347,509 Mouse,1fpu,-,A,PRC,-,0.925,2.319,8.8,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITEFMTYGNLLDYLREYLEKKNFIHRDLAARNCLVVADFGLS,2.4,0,8,0000000000000010000000000000000000000000000000000101000000000000000000100000000000000000000000000010000001000000100000000000000000000000000000000000000000000000010001000000000000000010000001000000000000000000001000000000000000000000000000000000010000000000000000000000000000000000000000000000001000000000000010000000000000110000010010000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000100000010010001010000000000000000000000000,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,"['Abl1', 'ABL1']",Abl1,ABL1,Abl,TK,N-[4-METHYL-3-[[4-(3-PYRIDINYL)-2-PYRIMIDINYL]AMINO]PHENYL]-3-PYRIDINECARBOXAMIDE,-,out,out,MOUSE/ABL1/1fpu_chainA,5728,532 diff --git a/opencadd/tests/databases/test_klifs_local_remote.py b/opencadd/tests/databases/test_klifs_local_remote.py index 2c9ee305..0b94e6b2 100644 --- a/opencadd/tests/databases/test_klifs_local_remote.py +++ b/opencadd/tests/databases/test_klifs_local_remote.py @@ -10,7 +10,7 @@ from rdkit import Chem from opencadd.databases.klifs.api import setup_local, setup_remote -from opencadd.databases.klifs.schema import DATAFRAME_COLUMNS +from opencadd.databases.klifs.schema import FIELDS from opencadd.utils import enter_temp_directory PATH_TEST_DATA = Path(__name__).parent / "opencadd/tests/data/klifs" @@ -28,7 +28,7 @@ def check_dataframe(dataframe, columns): assert isinstance(dataframe, pd.DataFrame) # Are DataFrame column names and their order correct? - assert dataframe.columns.to_list() == [column[0] for column in columns] + assert dataframe.columns.to_list() == list(columns.keys()) # Are DataFrame indices enumerated starting from 0 to length of DataFrame - 1? assert dataframe.index.to_list() == list(range(0, len(dataframe))) @@ -48,8 +48,8 @@ def test_all_kinase_groups(self): result_remote = REMOTE.kinases.all_kinase_groups() result_local = LOCAL.kinases.all_kinase_groups() - check_dataframe(result_remote, DATAFRAME_COLUMNS["kinase_groups"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["kinase_groups"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("kinase_groups")) + check_dataframe(result_local, FIELDS.oc_name_to_type("kinase_groups")) assert sorted(result_remote["kinase.group"].to_list()) == [ "AGC", @@ -75,8 +75,8 @@ def test_all_kinase_families(self, group, local_families): result_remote = REMOTE.kinases.all_kinase_families(group) result_local = LOCAL.kinases.all_kinase_families(group) - check_dataframe(result_remote, DATAFRAME_COLUMNS["kinase_families"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["kinase_families"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("kinase_families")) + check_dataframe(result_local, FIELDS.oc_name_to_type("kinase_families")) assert result_local["kinase.family"].to_list() == local_families # Do not test remote, @@ -130,10 +130,10 @@ def test_all_kinases(self, group, family, species, local_kinases): result_remote = REMOTE.kinases.all_kinases(group, family, species) result_local = LOCAL.kinases.all_kinases(group, family, species) - check_dataframe(result_remote, DATAFRAME_COLUMNS["kinases_all"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["kinases_all"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("kinases_all")) + check_dataframe(result_local, FIELDS.oc_name_to_type("kinases_all")) - assert result_local["kinase.hgnc_name"].to_list() == local_kinases[0] + assert result_local["kinase.gene_name"].to_list() == local_kinases[0] assert result_local["species.klifs"].to_list() == local_kinases[1] # Do not test remote, # since too many and may vary if structures are added to KLIFS. @@ -160,8 +160,8 @@ def test_all_ligands(self): result_remote = REMOTE.ligands.all_ligands() result_local = LOCAL.ligands.all_ligands() - check_dataframe(result_remote, DATAFRAME_COLUMNS["ligands"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["ligands"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("ligands")) + check_dataframe(result_local, FIELDS.oc_name_to_type("ligands")) assert result_local["ligand.expo_id"].to_list() == ["1N1", "QH1", "PRC", "-"] # Do not test remote, @@ -175,8 +175,8 @@ def test_all_structures(self): result_remote = REMOTE.structures.all_structures() result_local = LOCAL.structures.all_structures() - check_dataframe(result_remote, DATAFRAME_COLUMNS["structures"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["structures"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("structures")) + check_dataframe(result_local, FIELDS.oc_name_to_type("structures")) assert result_local["structure.klifs_id"].to_list() == [ 3482, @@ -195,7 +195,7 @@ def test_interaction_types(self): """ result_remote = REMOTE.interactions.interaction_types - check_dataframe(result_remote, DATAFRAME_COLUMNS["interaction_types"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("interaction_types")) with pytest.raises(NotImplementedError): LOCAL.interactions.interaction_types() @@ -208,8 +208,8 @@ def test_all_interactions(self): result_remote = REMOTE.interactions.all_interactions() result_local = LOCAL.interactions.all_interactions() - check_dataframe(result_remote, DATAFRAME_COLUMNS["interactions"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["interactions"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("interactions")) + check_dataframe(result_local, FIELDS.oc_name_to_type("interactions")) def test_all_bioactivities(self): @@ -223,7 +223,7 @@ def test_all_bioactivities(self): with pytest.raises(NotImplementedError): LOCAL.bioactivities.all_bioactivities() - check_dataframe(result_remote, DATAFRAME_COLUMNS["bioactivities"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("bioactivities")) class TestsFromKinaseIds: @@ -241,41 +241,43 @@ def test_by_kinase_klifs_id(self, kinase_klifs_ids): result_remote = REMOTE.kinases.by_kinase_klifs_id(kinase_klifs_ids) result_local = LOCAL.kinases.by_kinase_klifs_id(kinase_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["kinases"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["kinases"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("kinases")) + check_dataframe(result_local, FIELDS.oc_name_to_type("kinases")) # Ligands result_remote = REMOTE.ligands.by_kinase_klifs_id(kinase_klifs_ids) result_local = LOCAL.ligands.by_kinase_klifs_id(kinase_klifs_ids) check_dataframe( - result_remote, DATAFRAME_COLUMNS["ligands"] + [("kinase.klifs_id (query)", "int32")] + result_remote, + FIELDS.oc_name_to_type("ligands", {"kinase.klifs_id (query)": "int32"}), ) check_dataframe( - result_local, DATAFRAME_COLUMNS["ligands"] + [("kinase.klifs_id (query)", "int32")] + result_local, + FIELDS.oc_name_to_type("ligands", {"kinase.klifs_id (query)": "int32"}), ) # Structures result_remote = REMOTE.structures.by_kinase_klifs_id(kinase_klifs_ids) result_local = LOCAL.structures.by_kinase_klifs_id(kinase_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["structures"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["structures"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("structures")) + check_dataframe(result_local, FIELDS.oc_name_to_type("structures")) # Bioactivities result_remote = REMOTE.bioactivities.by_kinase_klifs_id(kinase_klifs_ids) with pytest.raises(NotImplementedError): LOCAL.bioactivities.by_kinase_klifs_id(kinase_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["bioactivities"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("bioactivities")) # Interactions result_remote = REMOTE.interactions.by_kinase_klifs_id(kinase_klifs_ids) result_local = LOCAL.interactions.by_kinase_klifs_id(kinase_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["interactions"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("interactions")) check_dataframe( result_local, - DATAFRAME_COLUMNS["interactions"] + [("kinase.klifs_id (query)", "int32")], + FIELDS.oc_name_to_type("interactions", {"kinase.klifs_id (query)": "int32"}), ) @pytest.mark.parametrize("kinase_klifs_ids", [10000, "XXX"]) @@ -314,14 +316,14 @@ def test_by_ligand_klifs_id(self, ligand_klifs_ids): with pytest.raises(NotImplementedError): LOCAL.ligands.by_ligand_klifs_id(ligand_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["ligands"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("ligands")) # Structures result_remote = REMOTE.structures.by_ligand_klifs_id(ligand_klifs_ids) with pytest.raises(NotImplementedError): LOCAL.structures.by_ligand_klifs_id(ligand_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["structures"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("structures")) # Bioactivities result_remote = REMOTE.bioactivities.by_ligand_klifs_id(ligand_klifs_ids) @@ -330,7 +332,7 @@ def test_by_ligand_klifs_id(self, ligand_klifs_ids): check_dataframe( result_remote, - DATAFRAME_COLUMNS["bioactivities"] + [("ligand.klifs_id (query)", "int32")], + FIELDS.oc_name_to_type("bioactivities", {"ligand.klifs_id (query)": "int32"}), ) # Interactions @@ -338,7 +340,7 @@ def test_by_ligand_klifs_id(self, ligand_klifs_ids): with pytest.raises(NotImplementedError): LOCAL.interactions.by_ligand_klifs_id(ligand_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["interactions"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("interactions")) @pytest.mark.parametrize("ligand_klifs_ids", [10000, "XXX"]) def test_by_ligand_klifs_id_raise(self, ligand_klifs_ids): @@ -370,15 +372,15 @@ def test_by_structure_klifs_id(self, structure_klifs_ids): result_remote = REMOTE.structures.by_structure_klifs_id(structure_klifs_ids) result_local = LOCAL.structures.by_structure_klifs_id(structure_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["structures"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["structures"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("structures")) + check_dataframe(result_local, FIELDS.oc_name_to_type("structures")) # Interactions result_remote = REMOTE.interactions.by_structure_klifs_id(structure_klifs_ids) result_local = LOCAL.interactions.by_structure_klifs_id(structure_klifs_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["interactions"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["interactions"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("interactions")) + check_dataframe(result_local, FIELDS.oc_name_to_type("interactions")) # Pockets (takes only one structure ID as input!) if isinstance(structure_klifs_ids, int): @@ -388,9 +390,9 @@ def test_by_structure_klifs_id(self, structure_klifs_ids): result_local_mol2 = LOCAL.pockets.by_structure_klifs_id(structure_klifs_ids, "mol2") result_local_pdb = LOCAL.pockets.by_structure_klifs_id(structure_klifs_ids, "pdb") - check_dataframe(result_remote, DATAFRAME_COLUMNS["pockets"]) - check_dataframe(result_local_mol2, DATAFRAME_COLUMNS["pockets"]) - check_dataframe(result_local_pdb, DATAFRAME_COLUMNS["pockets"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("pockets")) + check_dataframe(result_local_mol2, FIELDS.oc_name_to_type("pockets")) + check_dataframe(result_local_pdb, FIELDS.oc_name_to_type("pockets")) assert all(result_local_mol2 == result_local_pdb) assert all(result_local_mol2 == result_remote) @@ -433,8 +435,8 @@ def test_by_kinase_name(self, kinase_names, species): result_remote = REMOTE.kinases.by_kinase_name(kinase_names, species) result_local = LOCAL.kinases.by_kinase_name(kinase_names, species) - check_dataframe(result_remote, DATAFRAME_COLUMNS["kinases"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["kinases"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("kinases")) + check_dataframe(result_local, FIELDS.oc_name_to_type("kinases")) # Ligands result_remote = REMOTE.ligands.by_kinase_name(kinase_names) @@ -442,30 +444,34 @@ def test_by_kinase_name(self, kinase_names, species): check_dataframe( result_remote, - DATAFRAME_COLUMNS["ligands"] - + [ - ("kinase.klifs_id (query)", "int32"), - ("kinase.klifs_name (query)", "string"), - ("kinase.hgnc_name (query)", "string"), - ("species.klifs (query)", "string"), - ], + FIELDS.oc_name_to_type( + "ligands", + { + "kinase.klifs_id (query)": "int32", + "kinase.klifs_name (query)": "string", + "kinase.gene_name (query)": "string", + "species.klifs (query)": "string", + }, + ), ) check_dataframe( result_local, - DATAFRAME_COLUMNS["ligands"] - + [ - ("kinase.klifs_name (query)", "string"), - ("kinase.hgnc_name (query)", "string"), - ("species.klifs (query)", "string"), - ], + FIELDS.oc_name_to_type( + "ligands", + { + "kinase.klifs_name (query)": "string", + "kinase.gene_name (query)": "string", + "species.klifs (query)": "string", + }, + ), ) # Structures result_remote = REMOTE.structures.by_kinase_name(kinase_names) result_local = LOCAL.structures.by_kinase_name(kinase_names) - check_dataframe(result_remote, DATAFRAME_COLUMNS["structures"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["structures"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("structures")) + check_dataframe(result_local, FIELDS.oc_name_to_type("structures")) @pytest.mark.parametrize("kinase_names, species", [("XXX", None), ("XXX", "XXX")]) def test_by_kinase_name_raise(self, kinase_names, species): @@ -499,24 +505,23 @@ def test_by_ligand_expo_id(self, ligand_expo_ids): result_remote = REMOTE.ligands.by_ligand_expo_id(ligand_expo_ids) result_local = LOCAL.ligands.by_ligand_expo_id(ligand_expo_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["ligands"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["ligands"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("ligands")) + check_dataframe(result_local, FIELDS.oc_name_to_type("ligands")) # Structure result_remote = REMOTE.structures.by_ligand_expo_id(ligand_expo_ids) result_local = LOCAL.structures.by_ligand_expo_id(ligand_expo_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["structures"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["structures"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("structures")) + check_dataframe(result_local, FIELDS.oc_name_to_type("structures")) # Bioactivities result_remote = REMOTE.bioactivities.by_ligand_expo_id(ligand_expo_ids) with pytest.raises(NotImplementedError): LOCAL.bioactivities.by_ligand_expo_id(ligand_expo_ids) - print(result_remote) check_dataframe( result_remote, - DATAFRAME_COLUMNS["bioactivities"] + [("ligand.expo_id (query)", "string")], + FIELDS.oc_name_to_type("bioactivities", {"ligand.expo_id (query)": "string"}), ) @pytest.mark.parametrize("ligand_expo_ids", [1, "XXX"]) @@ -547,8 +552,8 @@ def test_by_structure_pdb_id(self, structure_pdb_ids): result_remote = REMOTE.structures.by_structure_pdb_id(structure_pdb_ids) result_local = LOCAL.structures.by_structure_pdb_id(structure_pdb_ids) - check_dataframe(result_remote, DATAFRAME_COLUMNS["structures"]) - check_dataframe(result_local, DATAFRAME_COLUMNS["structures"]) + check_dataframe(result_remote, FIELDS.oc_name_to_type("structures")) + check_dataframe(result_local, FIELDS.oc_name_to_type("structures")) @pytest.mark.parametrize("structure_pdb_ids", [1, "xxxx"]) def test_by_structure_pdb_id_raise(self, structure_pdb_ids): @@ -676,7 +681,7 @@ def _test_to_dataframe(dataframe, n_atoms, centroid): """ assert isinstance(dataframe, pd.DataFrame) - column_names = [column[0] for column in DATAFRAME_COLUMNS["coordinates"]] + column_names = list(FIELDS.oc_name_to_type("coordinates").keys()) assert dataframe.columns.to_list() == column_names assert dataframe.shape[0] == n_atoms assert centroid[0] == pytest.approx(dataframe["atom.x"].mean(), abs=1.0e-6) diff --git a/opencadd/tests/databases/test_sync_klifs_and_opencadd.py b/opencadd/tests/databases/test_sync_klifs_and_opencadd.py new file mode 100644 index 00000000..10c79380 --- /dev/null +++ b/opencadd/tests/databases/test_sync_klifs_and_opencadd.py @@ -0,0 +1,105 @@ +""" +Test if opencadd is up-to-date with KLIFS database (website and download). +If errors are raised, it is time to update opencadd. +""" + +from opencadd.databases.klifs.utils import KLIFS_CLIENT +from opencadd.databases.klifs.schema import FIELDS + + +class TestSyncKlifsSwaggerWithOpencadd: + """ + Test if opencadd is up-to-date with the KLIFS Swagger API (remote!). + """ + + def _test_klifs_model(self, data_opencadd, data_klifs): + """ + Check if opencadd is up-to-date with KLIFS models. + """ + + # Get kinases details keys in opencadd + keys_opencadd = set(sorted(data_opencadd.keys())) + # Get kinases details keys in KLIFS + result = data_klifs.response().result[0] + keys_klifs = set(sorted(list(result))) + + assert keys_opencadd == keys_klifs + + def test_all_kinases(self): + """ + Check if opencadd is up-to-date with XXX model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("kinases_all"), KLIFS_CLIENT.Information.get_kinase_names() + ) + + def test_kinases(self): + """ + Check if opencadd is up-to-date with KinaseInformation model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("kinases"), + KLIFS_CLIENT.Information.get_kinase_information(), + ) + + def test_ligands(self): + """ + Check if opencadd is up-to-date with ligandDetails model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("ligands"), + KLIFS_CLIENT.Ligands.get_ligands_list(kinase_ID=[1]), + ) + + def test_structures(self): + """ + Check if opencadd is up-to-date with ligandDetails model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("structures"), + KLIFS_CLIENT.Structures.get_structure_list(structure_ID=[1]), + ) + + def test_bioactivities(self): + """ + Check if opencadd is up-to-date with ligandDetails model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("bioactivities"), + KLIFS_CLIENT.Ligands.get_bioactivity_list_id(ligand_ID=2), + ) + + def test_interaction_types(self): + """ + Check if opencadd is up-to-date with InteractionList model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("interaction_types"), + KLIFS_CLIENT.Interactions.get_interactions_get_types(), + ) + + def test_pockets(self): + """ + Check if opencadd is up-to-date with MatchList model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("pockets"), + KLIFS_CLIENT.Interactions.get_interactions_match_residues(structure_ID=100), + ) + + def test_interactions(self): + """ + Check if opencadd is up-to-date with InteractionList model. + """ + + self._test_klifs_model( + FIELDS.remote_to_oc_names("interactions"), + KLIFS_CLIENT.Interactions.get_interactions_get_IFP(structure_ID=[1]), + )