JeanCGuerrero commited on
Commit
5d4742e
verified
1 Parent(s): 75f4cb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -18
app.py CHANGED
@@ -1,31 +1,31 @@
1
  import os
2
- import subprocess
3
  import gradio as gr
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
- from langchain_community.document_loaders import WebBaseLoader
6
  from langchain_community.vectorstores import Chroma
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
- from langchain.document_loaders import PyPDFLoader
9
  import requests
10
  from rerankers import Reranker
11
- from langchain_community.chat_models import ChatOllama
12
  from langchain import hub
13
  from langchain_core.output_parsers import StrOutputParser
14
 
15
- # Paso 1: Instalar ollama
16
- subprocess.run("curl -fsSL https://ollama.com/install.sh | sh", shell=True, check=True)
17
- subprocess.run("ollama serve &", shell=True, check=True)
18
- subprocess.run("ollama pull llama3.2:1b", shell=True, check=True)
19
 
20
- # Paso 2: Descargar el documento PDF
21
  URL = "https://gruposdetrabajo.sefh.es/gefp/images/stories/documentos/4-ATENCION-FARMACEUTICA/Nutricion/Manual_basico_N_clinica_y_Dietetica_Valencia_2012.pdf"
22
  response = requests.get(URL)
23
  with open("Manual_de_nutrici贸n_clinica.pdf", "wb") as f:
24
  f.write(response.content)
25
 
26
- # Paso 3: Inicializar el modelo y los embeddings
27
- local_llm = "llama3.2:1b"
28
- llm = ChatOllama(model=local_llm, temperature=0, top_k=50, top_p=0.95)
 
 
 
 
29
  chain = llm | StrOutputParser()
30
 
31
  # Cargar y procesar el PDF
@@ -42,7 +42,7 @@ vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, per
42
  # Inicializar el reranker
43
  ranker = Reranker("answerdotai/answerai-colbert-small-v1", model_type='colbert')
44
 
45
- # Paso 4: Definir la funci贸n RAG con reranking
46
  def format_docs(docs):
47
  return "\n\n".join(doc[0].page_content for doc in docs)
48
 
@@ -56,17 +56,14 @@ def test_rag_reranking(query, ranker):
56
  doc_details = doc.to_json()['kwargs']
57
  context.append(doc_details['page_content'])
58
  if len(context) > 0:
59
- # Aplicar reranking
60
  ranking = ranker.rank(query=query, docs=context)
61
- # Tomar el contexto m谩s relevante
62
  useful_context = ranking[0].text
63
- # Generar la respuesta
64
  generation = rag_chain.invoke({"context": useful_context, "question": query})
65
  return generation
66
  else:
67
  return "No tengo informaci贸n para responder a esta pregunta"
68
 
69
- # Paso 5: Crear una interfaz con Gradio
70
  def answer_query(query):
71
  return test_rag_reranking(query, ranker)
72
 
@@ -80,4 +77,3 @@ interface = gr.Interface(
80
 
81
  # Lanzar la interfaz
82
  interface.launch()
83
-
 
1
  import os
 
2
  import gradio as gr
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
4
  from langchain_community.vectorstores import Chroma
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.document_loaders import PyPDFLoader
7
  import requests
8
  from rerankers import Reranker
9
+ from langchain_huggingface import HuggingFacePipeline
10
  from langchain import hub
11
  from langchain_core.output_parsers import StrOutputParser
12
 
13
+ # Configurar USER_AGENT
14
+ os.environ["USER_AGENT"] = "MyHFSpace/1.0 (HuggingFace Space for Nutrition Q&A)"
 
 
15
 
16
+ # Paso 1: Descargar el documento PDF
17
  URL = "https://gruposdetrabajo.sefh.es/gefp/images/stories/documentos/4-ATENCION-FARMACEUTICA/Nutricion/Manual_basico_N_clinica_y_Dietetica_Valencia_2012.pdf"
18
  response = requests.get(URL)
19
  with open("Manual_de_nutrici贸n_clinica.pdf", "wb") as f:
20
  f.write(response.content)
21
 
22
+ # Paso 2: Inicializar el modelo usando Hugging Face Pipeline
23
+ # Nota: Si no tienes acceso a llama3.2:1b, reempl谩zalo por otro modelo de HF, como "distilbert-base-uncased"
24
+ llm = HuggingFacePipeline.from_model_id(
25
+ model_id="distilbert-base-uncased", # Cambia por el modelo al que tengas acceso
26
+ task="text-generation",
27
+ pipeline_kwargs={"temperature": 0, "top_k": 50, "top_p": 0.95, "max_length": 512}
28
+ )
29
  chain = llm | StrOutputParser()
30
 
31
  # Cargar y procesar el PDF
 
42
  # Inicializar el reranker
43
  ranker = Reranker("answerdotai/answerai-colbert-small-v1", model_type='colbert')
44
 
45
+ # Paso 3: Definir la funci贸n RAG con reranking
46
  def format_docs(docs):
47
  return "\n\n".join(doc[0].page_content for doc in docs)
48
 
 
56
  doc_details = doc.to_json()['kwargs']
57
  context.append(doc_details['page_content'])
58
  if len(context) > 0:
 
59
  ranking = ranker.rank(query=query, docs=context)
 
60
  useful_context = ranking[0].text
 
61
  generation = rag_chain.invoke({"context": useful_context, "question": query})
62
  return generation
63
  else:
64
  return "No tengo informaci贸n para responder a esta pregunta"
65
 
66
+ # Paso 4: Crear una interfaz con Gradio
67
  def answer_query(query):
68
  return test_rag_reranking(query, ranker)
69
 
 
77
 
78
  # Lanzar la interfaz
79
  interface.launch()