Spaces:
Sleeping
Sleeping
File size: 3,089 Bytes
38f8736 6c05acd 38f8736 351552e 38f8736 3818f5a 7eaf1c3 3d3f8f8 7eaf1c3 3818f5a 3d3f8f8 1ab68b7 3d3f8f8 7eaf1c3 3d3f8f8 7eaf1c3 6288997 7eaf1c3 3d3f8f8 3818f5a 3d3f8f8 1833979 3d3f8f8 120ad08 3d3f8f8 3818f5a 3d3f8f8 3818f5a 3d3f8f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import os
import spacy
import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import zipfile
zip_path = "en_core_web_lg-3.8.0.zip" # Carica il file ZIP nella cartella del progetto
extraction_dir = "./extracted_models" # Scegli una sottocartella per l'estrazione
test_dir = "./extracted_models/en_core_web_lg-3.8.0" # Cartella dopo l'estrazione
# Verifica se la cartella esiste già
if not os.path.exists(test_dir):
# Se la cartella non esiste, decomprimi il file ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extraction_dir)
print(f"Modello estratto correttamente nella cartella {extraction_dir}")
# Percorso della cartella estratta
model_path = os.path.join(extraction_dir, "en_core_web_lg-3.8.0") # Assicurati che sia corretto
# Carica il modello
nlp = spacy.load(model_path)
# Carica il modello SentenceTransformer
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2', device='cpu')
# Preprocessamento manuale (carica il manuale da un file o base di dati)
with open('testo.txt', 'r', encoding='utf-8') as file:
text = file.read()
# Tokenizza il testo in frasi usando SpaCy
doc = nlp(text)
sentences = [sent.text for sent in doc.sents] # Estrarre frasi dal testo
# Crea gli embedding per il manuale
embeddings = model.encode(sentences, batch_size=8, show_progress_bar=True)
# Funzione per ottenere le frasi più rilevanti
def find_relevant_sentences(query):
query_embedding = model.encode([query])
similarities = cosine_similarity(query_embedding, embeddings).flatten()
# Filtra i risultati in base alla similitudine
threshold = 0.2
filtered_results = [(idx, sim) for idx, sim in enumerate(similarities) if sim >= threshold]
# Ordina i risultati per similitudine
filtered_results.sort(key=lambda x: x[1], reverse=True)
# Ottieni le frasi più rilevanti
top_n = 5
relevant_sentences = [sentences[idx] for idx, _ in filtered_results[:top_n]]
doc = nlp(" ".join(relevant_sentences))
grouped_results = [sent.text for sent in doc.sents]
# Pulizia
cleaned_results = [text.replace("\n", " ") for text in grouped_results] # Rimuove gli a capo
final_output = " ".join(cleaned_results) # Combina tutte le frasi in un unico testo
return final_output
examples = [
["irresponsible use of the machine?"],
["If I have a problem how can I get help? "],
["precautions when using the cutting machine"],
["How do I change the knife of the cutting machine?"],
]
# Interfaccia Gradio
iface = gr.Interface(
fn=find_relevant_sentences,
inputs=gr.Textbox(label="Insert your query"),
outputs=gr.Textbox(label="Relevant sentences"),
examples=examples,
title="Manual Querying System",
description="Enter a question about the machine, and this tool will find the most relevant sentences from the manual."
)
# Avvia l'app Gradio
iface.launch()
|