Dinesh1102 commited on
Commit
b7e6390
·
verified ·
1 Parent(s): a313df1

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -118
app.py DELETED
@@ -1,118 +0,0 @@
1
- import tqdm
2
- from Bio import SeqIO
3
- import numpy as np
4
- import pandas as pd
5
- import tensorflow as tf
6
- import os
7
- import json
8
- from typing import Dict
9
- from collections import Counter
10
- import random
11
- import obonet
12
- from transformers import T5Tokenizer, T5EncoderModel
13
- import torch
14
- import re
15
- import gradio as gr
16
-
17
- device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
18
-
19
- # Load the tokenizer
20
- tokenizer = T5Tokenizer.from_pretrained('Rostlab/prot_t5_xl_half_uniref50-enc', do_lower_case=False) #.to(device)
21
-
22
- # Load the model
23
- model = T5EncoderModel.from_pretrained("Rostlab/prot_t5_xl_half_uniref50-enc").to(device)
24
-
25
- def get_embeddings(seq):
26
- sequence_examples = [" ".join(list(re.sub(r"[UZOB]", "X", seq)))]
27
-
28
- ids = tokenizer.batch_encode_plus(sequence_examples, add_special_tokens=True, padding="longest")
29
-
30
- input_ids = torch.tensor(ids['input_ids']).to(device)
31
- attention_mask = torch.tensor(ids['attention_mask']).to(device)
32
-
33
- # generate embeddings
34
- with torch.no_grad():
35
- embedding_repr = model(input_ids=input_ids,
36
- attention_mask=attention_mask)
37
-
38
- # extract residue embeddings for the first ([0,:]) sequence in the batch and remove padded & special tokens ([0,:7])
39
- emb_0 = embedding_repr.last_hidden_state[0]
40
- emb_0_per_protein = emb_0.mean(dim=0)
41
-
42
- return emb_0_per_protein
43
-
44
- def predict(filepath):
45
- sequences = SeqIO.parse(filepath, "fasta")
46
-
47
- ids = []
48
- num_sequences=sum(1 for seq in sequences)
49
- embeds = np.zeros((num_sequences, 1024))
50
- i = 0
51
- with open(filepath, "r") as fasta_file:
52
- # Iterate over each sequence in the file
53
- for sequence in SeqIO.parse(fasta_file, "fasta"):
54
- # Access the sequence ID and sequence data
55
- seq_id = sequence.id
56
- seq_data = str(sequence.seq)
57
- embeds[i] = get_embeddings(seq_data).detach().cpu().numpy()
58
- print(embeds[i])
59
- ids.append(seq_id)
60
- i += 1
61
-
62
- INPUT_SHAPE=[1024]
63
- num_of_labels=1500
64
-
65
- model = tf.keras.Sequential([
66
- tf.keras.layers.BatchNormalization(input_shape=INPUT_SHAPE),
67
- tf.keras.layers.Dense(units=512, activation='relu'),
68
- tf.keras.layers.Dropout(0.2),
69
- tf.keras.layers.Dense(units=512, activation='relu'),
70
- tf.keras.layers.Dropout(0.2),
71
- tf.keras.layers.Dense(units=512, activation='relu'),
72
- tf.keras.layers.Dropout(0.2),
73
- tf.keras.layers.Dense(units=num_of_labels, activation='sigmoid')
74
- ])
75
-
76
- model.compile(
77
- optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
78
- loss='binary_crossentropy',
79
- metrics=['binary_accuracy', tf.keras.metrics.AUC()]
80
- )
81
-
82
- model.load_weights('./my_model.weights.h5') #load model here
83
- labels_df=pd.read_csv('./labels.csv')
84
- labels_df=labels_df.drop(columns='Unnamed: 0')
85
-
86
- predictions = model.predict(embeds)
87
- predictions_list1=[]
88
- predictions_list2=[]
89
-
90
- # 'predictions' will contain the model's output for the custom input tensor
91
- # print(predictions)
92
- for prediction in predictions:
93
- tmp=[]
94
- t2=[]
95
- for i in prediction:
96
- x=0 if i<0.4 else 1
97
- tmp.append(x)
98
- t2.append(i)
99
- predictions_list1.append(tmp.copy())
100
- predictions_list2.append(t2.copy())
101
-
102
- label_columns = labels_df.columns
103
-
104
- # Convert the predictions into a DataFrame
105
- predictions_df = pd.DataFrame(predictions_list1, columns=label_columns)
106
- p21=pd.DataFrame(predictions_list2, columns=label_columns)
107
-
108
- # Save the DataFrame to a CSV file
109
- predictions_df.to_csv("predictions.csv", index=False) #output csv
110
- p21.to_csv("decimal.csv",index=False)
111
- return "predictions.csv"
112
-
113
- gr.Interface(
114
- predict,
115
- title = 'Protein Function Prediction using fasta file,upload a fasta file',
116
- inputs="file",
117
- outputs="file"
118
- ).launch()