Spaces:
Running
Running
Enzo Reis de Oliveira
commited on
Commit
·
214fccd
1
Parent(s):
3fc57b3
Feature upload csv
Browse files
app.py
CHANGED
@@ -1,21 +1,17 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
import json
|
4 |
-
import tempfile
|
5 |
-
|
6 |
import pandas as pd
|
7 |
import gradio as gr
|
8 |
-
from PIL import Image
|
9 |
|
10 |
# 1) Ajusta o path antes de importar o loader
|
11 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
12 |
INFERENCE_PATH = os.path.join(BASE_DIR, "smi-ted", "inference")
|
13 |
sys.path.insert(0, INFERENCE_PATH)
|
14 |
|
15 |
-
# 2) Importa o loader do SMI-TED Light
|
16 |
from smi_ted_light.load import load_smi_ted
|
17 |
|
18 |
-
#
|
19 |
MODEL_DIR = os.path.join(INFERENCE_PATH, "smi_ted_light")
|
20 |
model = load_smi_ted(
|
21 |
folder=MODEL_DIR,
|
@@ -23,58 +19,65 @@ model = load_smi_ted(
|
|
23 |
vocab_filename="bert_vocab_curated.txt",
|
24 |
)
|
25 |
|
26 |
-
#
|
27 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
smiles = smiles.strip()
|
29 |
if not smiles:
|
30 |
-
|
31 |
-
return json.dumps(erro), gr.update(visible=False)
|
32 |
-
|
33 |
try:
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# Retorna JSON em string e ativa o link de download para embeddings.csv
|
41 |
-
return json.dumps(vetor), gr.update(value=filename, visible=True)
|
42 |
except Exception as e:
|
43 |
-
|
44 |
-
return json.dumps(erro), gr.update(visible=False)
|
45 |
|
46 |
-
#
|
47 |
with gr.Blocks() as demo:
|
48 |
gr.Markdown(
|
49 |
"""
|
50 |
# SMI-TED Embedding Generator
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
"""
|
55 |
)
|
56 |
|
57 |
with gr.Row():
|
58 |
-
smiles_in = gr.Textbox(label="SMILES", placeholder="Ex.: CCO")
|
59 |
-
|
|
|
|
|
60 |
|
61 |
with gr.Row():
|
62 |
-
|
63 |
-
|
64 |
-
interactive=False,
|
65 |
-
lines=4,
|
66 |
-
placeholder="O vetor aparecerá aqui…"
|
67 |
-
)
|
68 |
-
download_csv = gr.File(
|
69 |
-
label="Baixar CSV",
|
70 |
-
visible=False
|
71 |
-
)
|
72 |
|
73 |
-
# Conecta botão à função que tem dois outputs
|
74 |
gerar_btn.click(
|
75 |
-
fn=
|
76 |
-
inputs=smiles_in,
|
77 |
-
outputs=[
|
78 |
)
|
79 |
|
80 |
if __name__ == "__main__":
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
import json
|
|
|
|
|
4 |
import pandas as pd
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
# 1) Ajusta o path antes de importar o loader
|
8 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
9 |
INFERENCE_PATH = os.path.join(BASE_DIR, "smi-ted", "inference")
|
10 |
sys.path.insert(0, INFERENCE_PATH)
|
11 |
|
|
|
12 |
from smi_ted_light.load import load_smi_ted
|
13 |
|
14 |
+
# 2) Carrega o modelo
|
15 |
MODEL_DIR = os.path.join(INFERENCE_PATH, "smi_ted_light")
|
16 |
model = load_smi_ted(
|
17 |
folder=MODEL_DIR,
|
|
|
19 |
vocab_filename="bert_vocab_curated.txt",
|
20 |
)
|
21 |
|
22 |
+
# 3) Função única para processar SMILES simples ou CSV de SMILES
|
23 |
+
def process_inputs(smiles: str, file_obj):
|
24 |
+
# Se vier um arquivo CSV, processa em batch
|
25 |
+
if file_obj is not None:
|
26 |
+
try:
|
27 |
+
df_in = pd.read_csv(file_obj.name)
|
28 |
+
smiles_list = df_in.iloc[:, 0].astype(str).tolist()
|
29 |
+
embeddings = []
|
30 |
+
for sm in smiles_list:
|
31 |
+
vec = model.encode(sm, return_torch=True)[0].tolist()
|
32 |
+
embeddings.append(vec)
|
33 |
+
# Monta DataFrame de saída
|
34 |
+
out_df = pd.DataFrame(embeddings)
|
35 |
+
out_df.insert(0, "smiles", smiles_list)
|
36 |
+
out_df.to_csv("embeddings.csv", index=False)
|
37 |
+
msg = f"Batch de {len(smiles_list)} SMILES processado. Baixe em embeddings.csv."
|
38 |
+
return msg, gr.update(value="embeddings.csv", visible=True)
|
39 |
+
except Exception as e:
|
40 |
+
return f"Erro no batch: {e}", gr.update(visible=False)
|
41 |
+
|
42 |
+
# Senão, processa SMILES único
|
43 |
smiles = smiles.strip()
|
44 |
if not smiles:
|
45 |
+
return "Digite um SMILES ou envie um arquivo CSV.", gr.update(visible=False)
|
|
|
|
|
46 |
try:
|
47 |
+
vec = model.encode(smiles, return_torch=True)[0].tolist()
|
48 |
+
# Salva CSV com cabeçalho
|
49 |
+
cols = ["smiles"] + [f"dim_{i}" for i in range(len(vec))]
|
50 |
+
df_out = pd.DataFrame([[smiles] + vec], columns=cols)
|
51 |
+
df_out.to_csv("embeddings.csv", index=False)
|
52 |
+
return json.dumps(vec), gr.update(value="embeddings.csv", visible=True)
|
|
|
|
|
53 |
except Exception as e:
|
54 |
+
return f"Erro ao gerar embedding: {e}", gr.update(visible=False)
|
|
|
55 |
|
56 |
+
# 4) Monta interface Blocks
|
57 |
with gr.Blocks() as demo:
|
58 |
gr.Markdown(
|
59 |
"""
|
60 |
# SMI-TED Embedding Generator
|
61 |
+
**Modo único:** cole um SMILES na caixa à esquerda.
|
62 |
+
**Modo batch:** faça upload de um CSV com várias linhas de SMILES (eles devem estar na primeira coluna).
|
63 |
+
Em ambos os casos, será gerado um arquivo `embeddings.csv` para download, com a primeira coluna de SMILES e o embedding nas colunas seguintes.
|
64 |
"""
|
65 |
)
|
66 |
|
67 |
with gr.Row():
|
68 |
+
smiles_in = gr.Textbox(label="SMILES (modo único)", placeholder="Ex.: CCO")
|
69 |
+
file_in = gr.File(label="CSV de SMILES (modo batch)", file_types=[".csv"])
|
70 |
+
|
71 |
+
gerar_btn = gr.Button("Gerar Embeddings")
|
72 |
|
73 |
with gr.Row():
|
74 |
+
output_msg = gr.Textbox(label="Resposta/Embedding (JSON)", interactive=False, lines=2)
|
75 |
+
download_csv = gr.File(label="Baixar embeddings.csv", visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
|
|
77 |
gerar_btn.click(
|
78 |
+
fn=process_inputs,
|
79 |
+
inputs=[smiles_in, file_in],
|
80 |
+
outputs=[output_msg, download_csv]
|
81 |
)
|
82 |
|
83 |
if __name__ == "__main__":
|