Enzo Reis de Oliveira commited on
Commit
cb4cd4f
·
1 Parent(s): f3e37c7

Searching for smiles regardless of the position column

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -21,31 +21,44 @@ model = load_smi_ted(
21
 
22
  # 3) Single function to process either a single SMILES or a CSV of SMILES
23
  def process_inputs(smiles: str, file_obj):
24
- # If a CSV file is provided, process in batch
25
  if file_obj is not None:
26
  try:
27
  df_in = pd.read_csv(file_obj.name)
28
- smiles_list = df_in.iloc[:, 0].astype(str).tolist()
 
 
 
 
 
 
 
 
 
 
29
  embeddings = []
30
  for sm in smiles_list:
31
  vec = model.encode(sm, return_torch=True)[0].tolist()
32
  embeddings.append(vec)
33
- # Build output DataFrame
 
34
  out_df = pd.DataFrame(embeddings)
35
  out_df.insert(0, "smiles", smiles_list)
36
  out_df.to_csv("embeddings.csv", index=False)
 
37
  msg = f"Processed batch of {len(smiles_list)} SMILES. Download embeddings.csv."
38
  return msg, gr.update(value="embeddings.csv", visible=True)
 
39
  except Exception as e:
40
  return f"Error processing batch: {e}", gr.update(visible=False)
41
 
42
- # Otherwise, process a single SMILES
43
  smiles = smiles.strip()
44
  if not smiles:
45
  return "Please enter a SMILES or upload a CSV file.", gr.update(visible=False)
46
  try:
47
  vec = model.encode(smiles, return_torch=True)[0].tolist()
48
- # Save CSV with header
49
  cols = ["smiles"] + [f"dim_{i}" for i in range(len(vec))]
50
  df_out = pd.DataFrame([[smiles] + vec], columns=cols)
51
  df_out.to_csv("embeddings.csv", index=False)
@@ -53,6 +66,7 @@ def process_inputs(smiles: str, file_obj):
53
  except Exception as e:
54
  return f"Error extracting embedding: {e}", gr.update(visible=False)
55
 
 
56
  # 4) Build the Gradio Blocks interface
57
  with gr.Blocks() as demo:
58
  gr.Markdown(
 
21
 
22
  # 3) Single function to process either a single SMILES or a CSV of SMILES
23
  def process_inputs(smiles: str, file_obj):
24
+ # Se um arquivo CSV for fornecido, processa em batch
25
  if file_obj is not None:
26
  try:
27
  df_in = pd.read_csv(file_obj.name)
28
+
29
+ # Procura coluna "smiles" (case‐insensitive), mas sem aceitar prefixes/sufixos
30
+ smiles_cols = [col for col in df_in.columns if col.lower() == "smiles"]
31
+ if not smiles_cols:
32
+ return (
33
+ "Error: The CSV must have a column named 'Smiles' with the respective SMILES.",
34
+ gr.update(visible=False),
35
+ )
36
+ smiles_col = smiles_cols[0]
37
+ smiles_list = df_in[smiles_col].astype(str).tolist()
38
+
39
  embeddings = []
40
  for sm in smiles_list:
41
  vec = model.encode(sm, return_torch=True)[0].tolist()
42
  embeddings.append(vec)
43
+
44
+ # Constroi DataFrame de saída
45
  out_df = pd.DataFrame(embeddings)
46
  out_df.insert(0, "smiles", smiles_list)
47
  out_df.to_csv("embeddings.csv", index=False)
48
+
49
  msg = f"Processed batch of {len(smiles_list)} SMILES. Download embeddings.csv."
50
  return msg, gr.update(value="embeddings.csv", visible=True)
51
+
52
  except Exception as e:
53
  return f"Error processing batch: {e}", gr.update(visible=False)
54
 
55
+ # Modo single
56
  smiles = smiles.strip()
57
  if not smiles:
58
  return "Please enter a SMILES or upload a CSV file.", gr.update(visible=False)
59
  try:
60
  vec = model.encode(smiles, return_torch=True)[0].tolist()
61
+ # Salva CSV com header
62
  cols = ["smiles"] + [f"dim_{i}" for i in range(len(vec))]
63
  df_out = pd.DataFrame([[smiles] + vec], columns=cols)
64
  df_out.to_csv("embeddings.csv", index=False)
 
66
  except Exception as e:
67
  return f"Error extracting embedding: {e}", gr.update(visible=False)
68
 
69
+
70
  # 4) Build the Gradio Blocks interface
71
  with gr.Blocks() as demo:
72
  gr.Markdown(