Enzo Reis de Oliveira commited on
Commit
5465560
·
1 Parent(s): 214fccd

Putting everything in english

Browse files
Files changed (1) hide show
  1. app.py +21 -21
app.py CHANGED
@@ -4,14 +4,14 @@ import json
4
  import pandas as pd
5
  import gradio as gr
6
 
7
- # 1) Ajusta o path antes de importar o loader
8
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
9
  INFERENCE_PATH = os.path.join(BASE_DIR, "smi-ted", "inference")
10
  sys.path.insert(0, INFERENCE_PATH)
11
 
12
  from smi_ted_light.load import load_smi_ted
13
 
14
- # 2) Carrega o modelo
15
  MODEL_DIR = os.path.join(INFERENCE_PATH, "smi_ted_light")
16
  model = load_smi_ted(
17
  folder=MODEL_DIR,
@@ -19,9 +19,9 @@ model = load_smi_ted(
19
  vocab_filename="bert_vocab_curated.txt",
20
  )
21
 
22
- # 3) Função única para processar SMILES simples ou CSV de SMILES
23
  def process_inputs(smiles: str, file_obj):
24
- # Se vier um arquivo CSV, processa em batch
25
  if file_obj is not None:
26
  try:
27
  df_in = pd.read_csv(file_obj.name)
@@ -30,51 +30,51 @@ def process_inputs(smiles: str, file_obj):
30
  for sm in smiles_list:
31
  vec = model.encode(sm, return_torch=True)[0].tolist()
32
  embeddings.append(vec)
33
- # Monta DataFrame de saída
34
  out_df = pd.DataFrame(embeddings)
35
  out_df.insert(0, "smiles", smiles_list)
36
  out_df.to_csv("embeddings.csv", index=False)
37
- msg = f"Batch de {len(smiles_list)} SMILES processado. Baixe em embeddings.csv."
38
  return msg, gr.update(value="embeddings.csv", visible=True)
39
  except Exception as e:
40
- return f"Erro no batch: {e}", gr.update(visible=False)
41
 
42
- # Senão, processa SMILES único
43
  smiles = smiles.strip()
44
  if not smiles:
45
- return "Digite um SMILES ou envie um arquivo CSV.", gr.update(visible=False)
46
  try:
47
  vec = model.encode(smiles, return_torch=True)[0].tolist()
48
- # Salva CSV com cabeçalho
49
  cols = ["smiles"] + [f"dim_{i}" for i in range(len(vec))]
50
  df_out = pd.DataFrame([[smiles] + vec], columns=cols)
51
  df_out.to_csv("embeddings.csv", index=False)
52
  return json.dumps(vec), gr.update(value="embeddings.csv", visible=True)
53
  except Exception as e:
54
- return f"Erro ao gerar embedding: {e}", gr.update(visible=False)
55
 
56
- # 4) Monta interface Blocks
57
  with gr.Blocks() as demo:
58
  gr.Markdown(
59
  """
60
  # SMI-TED Embedding Generator
61
- **Modo único:** cole um SMILES na caixa à esquerda.
62
- **Modo batch:** faça upload de um CSV com várias linhas de SMILES (eles devem estar na primeira coluna).
63
- Em ambos os casos, será gerado um arquivo `embeddings.csv` para download, com a primeira coluna de SMILES e o embedding nas colunas seguintes.
64
  """
65
  )
66
 
67
  with gr.Row():
68
- smiles_in = gr.Textbox(label="SMILES (modo único)", placeholder="Ex.: CCO")
69
- file_in = gr.File(label="CSV de SMILES (modo batch)", file_types=[".csv"])
70
 
71
- gerar_btn = gr.Button("Gerar Embeddings")
72
 
73
  with gr.Row():
74
- output_msg = gr.Textbox(label="Resposta/Embedding (JSON)", interactive=False, lines=2)
75
- download_csv = gr.File(label="Baixar embeddings.csv", visible=False)
76
 
77
- gerar_btn.click(
78
  fn=process_inputs,
79
  inputs=[smiles_in, file_in],
80
  outputs=[output_msg, download_csv]
 
4
  import pandas as pd
5
  import gradio as gr
6
 
7
+ # 1) Adjust path before importing the loader
8
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
9
  INFERENCE_PATH = os.path.join(BASE_DIR, "smi-ted", "inference")
10
  sys.path.insert(0, INFERENCE_PATH)
11
 
12
  from smi_ted_light.load import load_smi_ted
13
 
14
+ # 2) Load the SMI-TED Light model
15
  MODEL_DIR = os.path.join(INFERENCE_PATH, "smi_ted_light")
16
  model = load_smi_ted(
17
  folder=MODEL_DIR,
 
19
  vocab_filename="bert_vocab_curated.txt",
20
  )
21
 
22
+ # 3) Single function to process either a single SMILES or a CSV of SMILES
23
  def process_inputs(smiles: str, file_obj):
24
+ # If a CSV file is provided, process in batch
25
  if file_obj is not None:
26
  try:
27
  df_in = pd.read_csv(file_obj.name)
 
30
  for sm in smiles_list:
31
  vec = model.encode(sm, return_torch=True)[0].tolist()
32
  embeddings.append(vec)
33
+ # Build output DataFrame
34
  out_df = pd.DataFrame(embeddings)
35
  out_df.insert(0, "smiles", smiles_list)
36
  out_df.to_csv("embeddings.csv", index=False)
37
+ msg = f"Processed batch of {len(smiles_list)} SMILES. Download embeddings.csv."
38
  return msg, gr.update(value="embeddings.csv", visible=True)
39
  except Exception as e:
40
+ return f"Error processing batch: {e}", gr.update(visible=False)
41
 
42
+ # Otherwise, process a single SMILES
43
  smiles = smiles.strip()
44
  if not smiles:
45
+ return "Please enter a SMILES or upload a CSV file.", gr.update(visible=False)
46
  try:
47
  vec = model.encode(smiles, return_torch=True)[0].tolist()
48
+ # Save CSV with header
49
  cols = ["smiles"] + [f"dim_{i}" for i in range(len(vec))]
50
  df_out = pd.DataFrame([[smiles] + vec], columns=cols)
51
  df_out.to_csv("embeddings.csv", index=False)
52
  return json.dumps(vec), gr.update(value="embeddings.csv", visible=True)
53
  except Exception as e:
54
+ return f"Error generating embedding: {e}", gr.update(visible=False)
55
 
56
+ # 4) Build the Gradio Blocks interface
57
  with gr.Blocks() as demo:
58
  gr.Markdown(
59
  """
60
  # SMI-TED Embedding Generator
61
+ **Single mode:** paste a SMILES string in the left box.
62
+ **Batch mode:** upload a CSV file where each row has a SMILES in the first column.
63
+ In both cases, an `embeddings.csv` file will be generated for download, with the first column as SMILES and the embedding values in the following columns.
64
  """
65
  )
66
 
67
  with gr.Row():
68
+ smiles_in = gr.Textbox(label="SMILES (single mode)", placeholder="e.g. CCO")
69
+ file_in = gr.File(label="SMILES CSV (batch mode)", file_types=[".csv"])
70
 
71
+ generate_btn = gr.Button("Generate Embeddings")
72
 
73
  with gr.Row():
74
+ output_msg = gr.Textbox(label="Message / Embedding (JSON)", interactive=False, lines=2)
75
+ download_csv = gr.File(label="Download embeddings.csv", visible=False)
76
 
77
+ generate_btn.click(
78
  fn=process_inputs,
79
  inputs=[smiles_in, file_in],
80
  outputs=[output_msg, download_csv]