Spaces:

Mauro24
/

sentence-transformer-demo

Running

App Files Files Community

Mauro24 commited on Jan 1

Commit

0cea8e5

verified ·

1 Parent(s): a0e6e11

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -143

app.py CHANGED Viewed

@@ -1,159 +1,46 @@
-import os
-import spacy
 import gradio as gr
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-import numpy as np
-import zipfile
-import re
-print("Directory corrente:", os.getcwd())
-zip_path = "en_core_web_lg-3.8.0.zip"  # Carica il file ZIP nella cartella del progetto
-extraction_dir = "./extracted_models"  # Scegli una sottocartella per l'estrazione
-test_dir = "./extracted_models/en_core_web_lg-3.8.0"  # Cartella dopo l'estrazione
-# Verifica se la cartella esiste già
-if not os.path.exists(test_dir):
-    # Se la cartella non esiste, decomprimi il file ZIP
-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-        zip_ref.extractall(extraction_dir)
-    print(f"Modello estratto correttamente nella cartella {extraction_dir}")
-# Percorso del file zip caricato
-zip_path = "images.zip"  # Assicurati che il file sia stato caricato su Hugging Face
-extract_to = "images"    # Directory di destinazione per le immagini
-# Controlla se la directory esiste già
-if not os.path.exists(extract_to):
-    os.makedirs(extract_to)  # Crea la directory
-# Estrai il file zip
-if os.path.exists(zip_path):  # Controlla che il file zip esista
-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-        zip_ref.extractall(extract_to)
-    print(f"Immagini estratte nella directory: {extract_to}")
-    print("Contenuto della directory images:", os.listdir(extract_to))
-else:
-    print(f"File {zip_path} non trovato. Assicurati di caricarlo nello Space.")
-# Percorso della cartella estratta
-model_path = os.path.join(extraction_dir, "en_core_web_lg-3.8.0")  # Assicurati che sia corretto
-    # Carica il modello
-nlp = spacy.load(model_path)
-# Carica il modello SentenceTransformer
-model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2', device='cpu')
-# Preprocessamento manuale (carica il manuale da un file o base di dati)
-with open('testo.txt', 'r', encoding='utf-8') as file:
-    text = file.read()
-# Tokenizza il testo in frasi usando SpaCy
-doc = nlp(text)
-sentences = [sent.text for sent in doc.sents]  # Estrarre frasi dal testo
-# Crea gli embedding per il manuale
-embeddings = model.encode(sentences, batch_size=8, show_progress_bar=True)
-# Percorso della cartella delle immagini
-image_folder = "images"
-def extract_figure_numbers(text):
-    """Estrae tutti i numeri delle figure da una frase."""
-    matches = re.findall(r"\(Figure (\d+)\)", text, re.IGNORECASE)
-    if matches:
-        return matches  # Restituisce una lista di numeri di figure
-    return []
-def generate_figure_mapping(folder):
-    """Genera la mappatura delle figure dal nome dei file immagini."""
-    mapping = {}
-    for file_name in os.listdir(folder):
-        if file_name.lower().endswith((".jpg", ".png", ".jpeg")):
-            figure_reference = file_name.split(".")[0].replace("_", " ")
-            mapping[figure_reference] = file_name
-    return mapping
-figure_mapping = generate_figure_mapping(image_folder)
-#print("Generated figure mapping:", figure_mapping)
-def format_sentences(sentences):
-    """
-    Converte la lista in una stringa, sostituendo i delimitatori '|' con un a capo senza aggiungere spazi extra.
-    Interrompe il processo se trova '.end'.
-    """
-    # Uniamo la lista in una singola stringa
-    sentences_str = " ".join(sentences)
-    # Interrompiamo al primo '.end'
-    if ".end" in sentences_str:
-        sentences_str = sentences_str.split(".end")[0]
-    # Sostituiamo il delimitatore '|' con un a capo
-    formatted_response = sentences_str.replace(" |", "\n").replace("|", "\n")
-    return formatted_response
-def find_relevant_sentences(query, threshold=0.2, top_n=6):
-    """Trova le frasi più rilevanti e le immagini collegate."""
-    global sentences
-    query_embedding = model.encode([query])
-    similarities = cosine_similarity(query_embedding, embeddings).flatten()
-    filtered_results = [(idx, sim) for idx, sim in enumerate(similarities) if sim >= threshold]
-    filtered_results.sort(key=lambda x: x[1], reverse=True)
-    if not filtered_results:
-        return "**RESPONSE:**\nNo relevant sentences found for your query.", None
-    relevant_sentences = [sentences[idx] for idx, _ in filtered_results[:top_n]]
-    relevant_images = set()  # Usa un set per evitare duplicati
-    for sent in relevant_sentences:
-        figure_numbers = extract_figure_numbers(sent)  # Restituisce una lista di figure
-        for figure_number in figure_numbers:
-            if figure_number in figure_mapping:
-                image_path = os.path.join(image_folder, figure_mapping[figure_number])
-                if os.path.exists(image_path):
-                    relevant_images.add(image_path)  # Aggiunge al set
-    # Formatta le frasi senza categorizzazione
-    formatted_response = "****\n" + format_sentences(relevant_sentences)
-    return formatted_response, list(relevant_images)  # Converte il set in lista
-# Interfaccia Gradio
 examples = [
-    ["irresponsible use of the machine?"],
-    ["If I have a problem how can I get help?"],
-    ["precautions when using the cutting machine"],
-    ["How do I  DRILL BIT REPLACEMENT ?"],
-    ["instructions for changing the knife"],
-    ["lubrication for the knife holder cylinder"]
 ]
 iface = gr.Interface(
-    fn=find_relevant_sentences,
-    inputs=gr.Textbox(label="Insert your query"),
     outputs=[
-        gr.Textbox(label="Relevant sentences"),
-        gr.Gallery(label="Relevant figures", value=[os.path.join(image_folder, "4b.jpg")])  # Anteprima immagine iniziale
-    ],
-    examples=examples,
-    title="Manual Querying System",
-    description="Enter a question about the machine, and this tool will find the most relevant sentences and associated figures from the manual.",
 )
 iface.launch()

 import gradio as gr
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+# Carica il modello di embedding
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/LaBSE")
+# Carica i vectorstore FAISS salvati
+vectorstore = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)
+manual_vectorstore = FAISS.load_local("faiss_manual_index", embedding_model, allow_dangerous_deserialization=True)
+problems_vectorstore = FAISS.load_local("faiss_problems_index", embedding_model, allow_dangerous_deserialization=True)
+def search_query(query):
+    # Cerca nei manuali
+    manual_results = manual_vectorstore.similarity_search(query, k=2)
+    manual_output = "\n\n".join([doc.page_content for doc in manual_results])
+    # Cerca nei problemi
+    problems_results = problems_vectorstore.similarity_search(query, k=2)
+    problems_output = "\n\n".join([doc.page_content for doc in problems_results])
+    # Restituisce i risultati come output diviso
+    return manual_output, problems_output
 examples = [
+    ["How to change the knife?"],
+    ["What are the safety precautions for using the machine?"],
+    ["How can I get help with the machine?"]
 ]
+# Interfaccia Gradio
 iface = gr.Interface(
+    fn=search_query,
+   inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
     outputs=[
+    gr.Textbox(label="Manual Results"),
+    gr.Textbox(label="Issues Results")
+],
+    examples=examples,
+     title="Manual Querying System",
+    description="Enter a question to get relevant information extracted from the manual and the most common related issues."
 )
+# Avvia l'app
 iface.launch()