Mauro24 commited on
Commit
56adc9e
·
verified ·
1 Parent(s): b474dc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -27
app.py CHANGED
@@ -65,49 +65,92 @@ sentences = [sent.text for sent in doc.sents] # Estrarre frasi dal testo
65
  # Crea gli embedding per il manuale
66
  embeddings = model.encode(sentences, batch_size=8, show_progress_bar=True)
67
 
68
- # Funzione per ottenere le frasi più rilevanti
69
- def find_relevant_sentences(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  query_embedding = model.encode([query])
71
  similarities = cosine_similarity(query_embedding, embeddings).flatten()
72
 
73
- # Filtra i risultati in base alla similitudine
74
- threshold = 0.2
75
  filtered_results = [(idx, sim) for idx, sim in enumerate(similarities) if sim >= threshold]
76
-
77
- # Ordina i risultati per similitudine
78
  filtered_results.sort(key=lambda x: x[1], reverse=True)
79
 
80
- # Ottieni le frasi più rilevanti
81
- top_n = 5
 
82
  relevant_sentences = [sentences[idx] for idx, _ in filtered_results[:top_n]]
83
-
84
- doc = nlp(" ".join(relevant_sentences))
85
-
86
- grouped_results = [sent.text for sent in doc.sents]
87
- # Pulizia
88
- cleaned_results = [text.replace("\n", " ") for text in grouped_results] # Rimuove gli a capo
89
- final_output = " ".join(cleaned_results) # Combina tutte le frasi in un unico testo
90
 
91
-
92
- return final_output
 
 
 
 
 
93
 
 
 
 
 
 
 
 
94
  examples = [
95
  ["irresponsible use of the machine?"],
96
- ["If I have a problem how can I get help? "],
97
  ["precautions when using the cutting machine"],
98
- ["How do I change the knife of the cutting machine?"],
99
-
 
100
  ]
101
 
102
- # Interfaccia Gradio
103
  iface = gr.Interface(
104
- fn=find_relevant_sentences,
105
- inputs=gr.Textbox(label="Insert your query"),
106
- outputs=gr.Textbox(label="Relevant sentences"),
107
- examples=examples,
108
  title="Manual Querying System",
109
- description="Enter a question about the machine, and this tool will find the most relevant sentences from the manual."
110
  )
111
 
112
- # Avvia l'app Gradio
113
  iface.launch()
 
65
  # Crea gli embedding per il manuale
66
  embeddings = model.encode(sentences, batch_size=8, show_progress_bar=True)
67
 
68
+ # Percorso della cartella delle immagini
69
+ image_folder = "./images"
70
+
71
+ def extract_figure_numbers(text):
72
+ """Estrae tutti i numeri delle figure da una frase."""
73
+ matches = re.findall(r"\(Figure (\d+)\)", text, re.IGNORECASE)
74
+ if matches:
75
+ return matches # Restituisce una lista di numeri di figure
76
+ return []
77
+
78
+
79
+ def generate_figure_mapping(folder):
80
+ """Genera la mappatura delle figure dal nome dei file immagini."""
81
+ mapping = {}
82
+ for file_name in os.listdir(folder):
83
+ if file_name.lower().endswith((".jpg", ".png", ".jpeg")):
84
+ figure_reference = file_name.split(".")[0].replace("_", " ")
85
+ mapping[figure_reference] = file_name
86
+ return mapping
87
+
88
+ figure_mapping = generate_figure_mapping(image_folder)
89
+ #print("Generated figure mapping:", figure_mapping)
90
+
91
+ def format_sentences(sentences):
92
+ """
93
+ Converte la lista in una stringa, sostituendo i delimitatori '|' con un a capo senza aggiungere spazi extra.
94
+ Interrompe il processo se trova '.end'.
95
+ """
96
+ # Uniamo la lista in una singola stringa
97
+ sentences_str = " ".join(sentences)
98
+
99
+ # Interrompiamo al primo '.end'
100
+ if ".end" in sentences_str:
101
+ sentences_str = sentences_str.split(".end")[0]
102
+
103
+ # Sostituiamo il delimitatore '|' con un a capo
104
+ formatted_response = sentences_str.replace(" |", "\n").replace("|", "\n")
105
+
106
+ return formatted_response
107
+
108
+ def find_relevant_sentences(query, threshold=0.2, top_n=6):
109
+ """Trova le frasi più rilevanti e le immagini collegate."""
110
+ global sentences
111
  query_embedding = model.encode([query])
112
  similarities = cosine_similarity(query_embedding, embeddings).flatten()
113
 
 
 
114
  filtered_results = [(idx, sim) for idx, sim in enumerate(similarities) if sim >= threshold]
 
 
115
  filtered_results.sort(key=lambda x: x[1], reverse=True)
116
 
117
+ if not filtered_results:
118
+ return "**RESPONSE:**\nNo relevant sentences found for your query.", None
119
+
120
  relevant_sentences = [sentences[idx] for idx, _ in filtered_results[:top_n]]
121
+ relevant_images = set() # Usa un set per evitare duplicati
 
 
 
 
 
 
122
 
123
+ for sent in relevant_sentences:
124
+ figure_numbers = extract_figure_numbers(sent) # Restituisce una lista di figure
125
+ for figure_number in figure_numbers:
126
+ if figure_number in figure_mapping:
127
+ image_path = os.path.join(image_folder, figure_mapping[figure_number])
128
+ if os.path.exists(image_path):
129
+ relevant_images.add(image_path) # Aggiunge al set
130
 
131
+ # Formatta le frasi senza categorizzazione
132
+ formatted_response = "****\n" + format_sentences(relevant_sentences)
133
+ return formatted_response, list(relevant_images) # Converte il set in lista
134
+
135
+
136
+
137
+ # Interfaccia Gradio
138
  examples = [
139
  ["irresponsible use of the machine?"],
140
+ ["If I have a problem how can I get help?"],
141
  ["precautions when using the cutting machine"],
142
+ ["How do I DRILL BIT REPLACEMENT ?"],
143
+ ["instructions for changing the knife"],
144
+ ["lubrication for the knife holder cylinder"]
145
  ]
146
 
 
147
  iface = gr.Interface(
148
+ fn=find_relevant_sentences,
149
+ inputs=gr.Textbox(label="Insert your query"),
150
+ outputs=[gr.Textbox(label="Relevant sentences"), gr.Gallery(label="Relevant figures")],
151
+ examples=examples,
152
  title="Manual Querying System",
153
+ description="Enter a question about the machine, and this tool will find the most relevant sentences and associated figures from the manual.",
154
  )
155
 
 
156
  iface.launch()