Muzammil6376 commited on
Commit
cb3c155
Β·
verified Β·
1 Parent(s): dcc36ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -27
app.py CHANGED
@@ -7,7 +7,6 @@ import gradio as gr
7
  from PIL import Image
8
  from huggingface_hub import InferenceClient
9
 
10
- # βœ… Community imports
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
  from langchain_community.vectorstores import FAISS
13
  from langchain_community.llms import HuggingFaceEndpoint
@@ -20,20 +19,15 @@ from unstructured.partition.pdf import partition_pdf
20
  from unstructured.partition.utils.constants import PartitionStrategy
21
 
22
  # β€”β€”β€”β€”β€” Config & Folders β€”β€”β€”β€”β€”
23
- PDF_DIR = Path("pdfs")
24
- FIG_DIR = Path("figures")
25
- PDF_DIR.mkdir(exist_ok=True)
26
- FIG_DIR.mkdir(exist_ok=True)
27
 
28
  # β€”β€”β€”β€”β€” Read your HF_TOKEN secret β€”β€”β€”β€”β€”
29
  hf_token = os.environ["HF_TOKEN"]
30
 
31
  # β€”β€”β€”β€”β€” Embeddings & LLM Setup β€”β€”β€”β€”β€”
32
- embedding_model = HuggingFaceEmbeddings(
33
- model_name="sentence-transformers/all-MiniLM-L6-v2"
34
- )
35
 
36
- # LLM via HF Inference API endpoint
37
  llm = HuggingFaceEndpoint(
38
  endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
39
  huggingfacehub_api_token=hf_token,
@@ -41,7 +35,6 @@ llm = HuggingFaceEndpoint(
41
  max_length=512,
42
  )
43
 
44
- # Prompt
45
  TEMPLATE = """
46
  Use the following context to answer the question. If unknown, say so.
47
  Context: {context}
@@ -50,22 +43,19 @@ Answer (up to 3 sentences):
50
  """
51
  prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
52
 
53
- # Inference client for image captioning
54
  vision_client = InferenceClient(
55
- repo_id="Salesforce/blip-image-captioning-base",
56
  token=hf_token,
57
  )
58
 
59
- # Globals (will initialize after processing)
60
  vector_store = None
61
  qa_chain = None
62
 
63
-
64
  def extract_image_caption(path: str) -> str:
65
  with Image.open(path) as img:
66
  return vision_client.image_to_text(img)
67
 
68
-
69
  def process_pdf(pdf_file) -> str:
70
  global vector_store, qa_chain
71
 
@@ -81,7 +71,6 @@ def process_pdf(pdf_file) -> str:
81
  )
82
 
83
  texts = [el.text for el in elems if el.category not in ("Image", "Table")]
84
-
85
  for img_file in FIG_DIR.iterdir():
86
  texts.append(extract_image_caption(str(img_file)))
87
 
@@ -97,27 +86,19 @@ def process_pdf(pdf_file) -> str:
97
 
98
  return f"βœ… Processed `{pdf_file.name}` into {len(docs)} chunks."
99
 
100
-
101
  def answer_query(question: str) -> str:
102
  if qa_chain is None:
103
  return "❗ Please upload and process a PDF first."
104
  return qa_chain.run(question)
105
 
106
-
107
- # β€”β€”β€”β€”β€” Gradio UI β€”β€”β€”β€”β€”
108
  with gr.Blocks() as demo:
109
- gr.Markdown("## πŸ“„πŸ“· Multimodal RAG β€” Hugging Face Spaces")
110
-
111
  with gr.Row():
112
  pdf_in = gr.File(label="Upload PDF", type="file")
113
- btn_proc = gr.Button("Process PDF")
114
- status = gr.Textbox(label="Status")
115
-
116
  with gr.Row():
117
  q_in = gr.Textbox(label="Your Question")
118
- btn_ask = gr.Button("Ask")
119
- ans_out = gr.Textbox(label="Answer")
120
-
121
  btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
122
  btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)
123
 
 
7
  from PIL import Image
8
  from huggingface_hub import InferenceClient
9
 
 
10
  from langchain_community.embeddings import HuggingFaceEmbeddings
11
  from langchain_community.vectorstores import FAISS
12
  from langchain_community.llms import HuggingFaceEndpoint
 
19
  from unstructured.partition.utils.constants import PartitionStrategy
20
 
21
  # β€”β€”β€”β€”β€” Config & Folders β€”β€”β€”β€”β€”
22
+ PDF_DIR = Path("pdfs"); FIG_DIR = Path("figures")
23
+ PDF_DIR.mkdir(exist_ok=True); FIG_DIR.mkdir(exist_ok=True)
 
 
24
 
25
  # β€”β€”β€”β€”β€” Read your HF_TOKEN secret β€”β€”β€”β€”β€”
26
  hf_token = os.environ["HF_TOKEN"]
27
 
28
  # β€”β€”β€”β€”β€” Embeddings & LLM Setup β€”β€”β€”β€”β€”
29
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
30
 
 
31
  llm = HuggingFaceEndpoint(
32
  endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
33
  huggingfacehub_api_token=hf_token,
 
35
  max_length=512,
36
  )
37
 
 
38
  TEMPLATE = """
39
  Use the following context to answer the question. If unknown, say so.
40
  Context: {context}
 
43
  """
44
  prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
45
 
46
+ # β€”β€”β€”β€”β€” FIXED: correct keyword for InferenceClient β€”β€”β€”β€”β€”
47
  vision_client = InferenceClient(
48
+ model="Salesforce/blip-image-captioning-base",
49
  token=hf_token,
50
  )
51
 
 
52
  vector_store = None
53
  qa_chain = None
54
 
 
55
  def extract_image_caption(path: str) -> str:
56
  with Image.open(path) as img:
57
  return vision_client.image_to_text(img)
58
 
 
59
  def process_pdf(pdf_file) -> str:
60
  global vector_store, qa_chain
61
 
 
71
  )
72
 
73
  texts = [el.text for el in elems if el.category not in ("Image", "Table")]
 
74
  for img_file in FIG_DIR.iterdir():
75
  texts.append(extract_image_caption(str(img_file)))
76
 
 
86
 
87
  return f"βœ… Processed `{pdf_file.name}` into {len(docs)} chunks."
88
 
 
89
  def answer_query(question: str) -> str:
90
  if qa_chain is None:
91
  return "❗ Please upload and process a PDF first."
92
  return qa_chain.run(question)
93
 
 
 
94
  with gr.Blocks() as demo:
95
+ gr.Markdown("## πŸ“„πŸ“· Multimodal RAG β€” HF Spaces")
 
96
  with gr.Row():
97
  pdf_in = gr.File(label="Upload PDF", type="file")
98
+ btn_proc = gr.Button("Process PDF"); status = gr.Textbox(label="Status")
 
 
99
  with gr.Row():
100
  q_in = gr.Textbox(label="Your Question")
101
+ btn_ask = gr.Button("Ask"); ans_out = gr.Textbox(label="Answer")
 
 
102
  btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
103
  btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)
104