File size: 2,208 Bytes
4d799f2
 
e1e6b13
 
 
 
843425c
e1e6b13
843425c
 
 
 
 
 
e1e6b13
843425c
64428bf
 
 
 
 
 
e1e6b13
4d799f2
64428bf
 
e1e6b13
 
f63af71
64428bf
4d799f2
e1e6b13
4d799f2
 
 
 
e1e6b13
 
fe92162
e1e6b13
 
4d799f2
e1e6b13
4d799f2
 
 
 
 
e1e6b13
 
4d799f2
 
e1e6b13
4d799f2
 
 
 
e1e6b13
 
 
 
 
 
4d799f2
 
 
 
 
e1e6b13
4d799f2
f63af71
64428bf
f63af71
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import sys
import json
import tempfile
import pandas as pd
import gradio as gr

# 1) Ajuste de path ANTES de importar smi_ted_light
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
INFERENCE_PATH = os.path.join(BASE_DIR, "smi-ted", "inference")
sys.path.insert(0, INFERENCE_PATH)

from smi_ted_light.load import load_smi_ted

# 2) Carrega o modelo
MODEL_DIR = os.path.join(INFERENCE_PATH, "smi_ted_light")
model = load_smi_ted(
    folder=MODEL_DIR,
    ckpt_filename="smi-ted-Light_40.pt",
    vocab_filename="bert_vocab_curated.txt",
)

# 3) Função que retorna STRING JSON + gr.update para o CSV
def gerar_embedding_e_csv(smiles: str):
    smiles = smiles.strip()
    if not smiles:
        erro = {"erro": "digite uma sequência SMILES primeiro"}
        return json.dumps(erro), gr.update(visible=False)

    try:
        vetor = model.encode(smiles, return_torch=True)[0].tolist()
        # monta CSV
        df = pd.DataFrame([vetor])
        tmp = tempfile.NamedTemporaryFile(suffix=".csv", delete=False)
        df.to_csv(tmp.name, index=False)
        tmp.close()
        # retorna JSON-string e torna o link visível
        return json.dumps(vetor), gr.update(value=tmp.name, visible=True)
    except Exception as e:
        erro = {"erro": str(e)}
        return json.dumps(erro), gr.update(visible=False)

# 4) Interface Blocks
with gr.Blocks() as demo:
    gr.Markdown(
        """
        ## SMI-TED Embedding Generator  
        Cole uma sequência SMILES e receba:
        1. Uma **string JSON** com o vetor (Textbox)  
        2. Um link para **baixar o CSV** (File)  
        """
    )

    with gr.Row():
        inp_smiles = gr.Textbox(label="SMILES", placeholder="Ex.: CCO")
        btn = gr.Button("Gerar Embedding")
    with gr.Row():
        out_text = gr.Textbox(
            label="Embedding (JSON)", 
            interactive=False, 
            lines=4, 
            placeholder='Vai aparecer aqui o vetor como JSON...'
        )
        out_file = gr.File(label="Download do CSV", visible=False)

    btn.click(
        fn=gerar_embedding_e_csv,
        inputs=inp_smiles,
        outputs=[out_text, out_file]
    )

if __name__ == "__main__":
    demo.launch()