emmas96 commited on
Commit
39315d2
·
1 Parent(s): ce1a0a5

try protein sequence prediction directly with esm package

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -4,7 +4,7 @@ import torch
4
  import numpy as np
5
  import pandas as pd
6
  import streamlit as st
7
- import bio_embeddings.embed
8
 
9
  from rdkit import Chem
10
  from rdkit.Chem import Draw
@@ -191,8 +191,13 @@ def display_protein():
191
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
192
 
193
  if sequence:
194
- embeddings = bio_embeddings.embed(sequence, "esm")
195
- coordinates = embeddings["esm"].detach().numpy()
 
 
 
 
 
196
  st.write(coordinates)
197
 
198
  """
 
4
  import numpy as np
5
  import pandas as pd
6
  import streamlit as st
7
+ import esm
8
 
9
  from rdkit import Chem
10
  from rdkit.Chem import Draw
 
191
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
192
 
193
  if sequence:
194
+ model, alphabet = esm.pretrained.esm1_t6_43M_UR50S()
195
+
196
+ batch_converter = alphabet.get_batch_converter()
197
+ inputs = batch_converter([sequence])
198
+ embeddings = model.embed(inputs)
199
+ coordinates = embeddings["representations"][0].detach().numpy()
200
+
201
  st.write(coordinates)
202
 
203
  """