Mauro24 commited on
Commit
f77c0a8
·
verified ·
1 Parent(s): 3ebe6ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -1,22 +1,21 @@
1
  import gradio as gr
2
  from sentence_transformers import SentenceTransformer
3
- from scipy.spatial.distance import cosine
4
  from sklearn.metrics.pairwise import cosine_similarity
5
- import nltk
6
 
7
- nltk.download('punkt', download_dir='./nltk_data')
8
- nltk.data.path.append('./nltk_data')
9
 
10
- # Carica il modello
11
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device='cpu')
12
- nltk.download('punkt')
13
 
14
- # Preprocessamento manuale (potresti caricare il manuale da un file o base di dati)
15
  with open('testo.txt', 'r', encoding='utf-8') as file:
16
  text = file.read()
17
 
18
- # Tokenizza il testo
19
- sentences = nltk.sent_tokenize(text)
 
20
 
21
  # Crea gli embedding per il manuale
22
  embeddings = model.encode(sentences, batch_size=8, show_progress_bar=True)
@@ -49,4 +48,4 @@ iface = gr.Interface(
49
  )
50
 
51
  # Avvia l'app Gradio
52
- iface.launch()
 
1
  import gradio as gr
2
  from sentence_transformers import SentenceTransformer
 
3
  from sklearn.metrics.pairwise import cosine_similarity
4
+ import spacy
5
 
6
+ # Carica SpaCy
7
+ nlp = spacy.load("en_core_web_sm") # Assicurati di avere installato il modello SpaCy `en_core_web_sm`
8
 
9
+ # Carica il modello SentenceTransformer
10
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device='cpu')
 
11
 
12
+ # Preprocessamento manuale (carica il manuale da un file o base di dati)
13
  with open('testo.txt', 'r', encoding='utf-8') as file:
14
  text = file.read()
15
 
16
+ # Tokenizza il testo in frasi usando SpaCy
17
+ doc = nlp(text)
18
+ sentences = [sent.text for sent in doc.sents] # Estrarre frasi dal testo
19
 
20
  # Crea gli embedding per il manuale
21
  embeddings = model.encode(sentences, batch_size=8, show_progress_bar=True)
 
48
  )
49
 
50
  # Avvia l'app Gradio
51
+ iface.launch()