Muzammil6376 commited on
Commit
7fdd092
Β·
verified Β·
1 Parent(s): cb3c155

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -14
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  # app.py
2
 
3
  import os
@@ -19,14 +20,18 @@ from unstructured.partition.pdf import partition_pdf
19
  from unstructured.partition.utils.constants import PartitionStrategy
20
 
21
  # β€”β€”β€”β€”β€” Config & Folders β€”β€”β€”β€”β€”
22
- PDF_DIR = Path("pdfs"); FIG_DIR = Path("figures")
23
- PDF_DIR.mkdir(exist_ok=True); FIG_DIR.mkdir(exist_ok=True)
 
 
24
 
25
  # β€”β€”β€”β€”β€” Read your HF_TOKEN secret β€”β€”β€”β€”β€”
26
  hf_token = os.environ["HF_TOKEN"]
27
 
28
  # β€”β€”β€”β€”β€” Embeddings & LLM Setup β€”β€”β€”β€”β€”
29
- embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
30
 
31
  llm = HuggingFaceEndpoint(
32
  endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
@@ -43,37 +48,47 @@ Answer (up to 3 sentences):
43
  """
44
  prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
45
 
46
- # β€”β€”β€”β€”β€” FIXED: correct keyword for InferenceClient β€”β€”β€”β€”β€”
47
  vision_client = InferenceClient(
48
  model="Salesforce/blip-image-captioning-base",
49
  token=hf_token,
50
  )
51
 
 
52
  vector_store = None
53
  qa_chain = None
54
 
 
55
  def extract_image_caption(path: str) -> str:
56
  with Image.open(path) as img:
57
  return vision_client.image_to_text(img)
58
 
59
- def process_pdf(pdf_file) -> str:
 
 
60
  global vector_store, qa_chain
61
 
62
- out_path = PDF_DIR / pdf_file.name
63
- with open(out_path, "wb") as f:
64
- f.write(pdf_file.read())
 
65
 
 
66
  elems = partition_pdf(
67
- str(out_path),
68
  strategy=PartitionStrategy.HI_RES,
69
  extract_image_block_types=["Image", "Table"],
70
  extract_image_block_output_dir=str(FIG_DIR),
71
  )
72
 
 
73
  texts = [el.text for el in elems if el.category not in ("Image", "Table")]
 
 
74
  for img_file in FIG_DIR.iterdir():
75
  texts.append(extract_image_caption(str(img_file)))
76
 
 
77
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
78
  docs = splitter.split_text("\n\n".join(texts))
79
 
@@ -84,21 +99,33 @@ def process_pdf(pdf_file) -> str:
84
  chain_type_kwargs={"prompt": prompt},
85
  )
86
 
87
- return f"βœ… Processed `{pdf_file.name}` into {len(docs)} chunks."
 
88
 
89
  def answer_query(question: str) -> str:
90
  if qa_chain is None:
91
  return "❗ Please upload and process a PDF first."
92
  return qa_chain.run(question)
93
 
 
 
94
  with gr.Blocks() as demo:
95
- gr.Markdown("## πŸ“„πŸ“· Multimodal RAG β€” HF Spaces")
 
96
  with gr.Row():
97
- pdf_in = gr.File(label="Upload PDF", type="file")
98
- btn_proc = gr.Button("Process PDF"); status = gr.Textbox(label="Status")
 
 
 
 
 
 
99
  with gr.Row():
100
  q_in = gr.Textbox(label="Your Question")
101
- btn_ask = gr.Button("Ask"); ans_out = gr.Textbox(label="Answer")
 
 
102
  btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
103
  btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)
104
 
 
1
+
2
  # app.py
3
 
4
  import os
 
20
  from unstructured.partition.utils.constants import PartitionStrategy
21
 
22
  # β€”β€”β€”β€”β€” Config & Folders β€”β€”β€”β€”β€”
23
+ PDF_DIR = Path("pdfs")
24
+ FIG_DIR = Path("figures")
25
+ PDF_DIR.mkdir(exist_ok=True)
26
+ FIG_DIR.mkdir(exist_ok=True)
27
 
28
  # β€”β€”β€”β€”β€” Read your HF_TOKEN secret β€”β€”β€”β€”β€”
29
  hf_token = os.environ["HF_TOKEN"]
30
 
31
  # β€”β€”β€”β€”β€” Embeddings & LLM Setup β€”β€”β€”β€”β€”
32
+ embedding_model = HuggingFaceEmbeddings(
33
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
34
+ )
35
 
36
  llm = HuggingFaceEndpoint(
37
  endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
 
48
  """
49
  prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
50
 
51
+ # β€”β€”β€”β€”β€” Inference client for image captioning β€”β€”β€”β€”β€”
52
  vision_client = InferenceClient(
53
  model="Salesforce/blip-image-captioning-base",
54
  token=hf_token,
55
  )
56
 
57
+ # Globals (initialized after processing)
58
  vector_store = None
59
  qa_chain = None
60
 
61
+
62
  def extract_image_caption(path: str) -> str:
63
  with Image.open(path) as img:
64
  return vision_client.image_to_text(img)
65
 
66
+
67
+ def process_pdf(pdf_path: str) -> str:
68
+ """Ingest a local PDF file, extract text & images, chunk, embed, and index."""
69
  global vector_store, qa_chain
70
 
71
+ # Move the uploaded PDF into our PDFs folder
72
+ src = Path(pdf_path)
73
+ dest = PDF_DIR / src.name
74
+ src.rename(dest)
75
 
76
+ # Partition PDF into text + image blocks
77
  elems = partition_pdf(
78
+ str(dest),
79
  strategy=PartitionStrategy.HI_RES,
80
  extract_image_block_types=["Image", "Table"],
81
  extract_image_block_output_dir=str(FIG_DIR),
82
  )
83
 
84
+ # Collect text blocks
85
  texts = [el.text for el in elems if el.category not in ("Image", "Table")]
86
+
87
+ # Generate captions for each extracted image
88
  for img_file in FIG_DIR.iterdir():
89
  texts.append(extract_image_caption(str(img_file)))
90
 
91
+ # Chunk and embed
92
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
93
  docs = splitter.split_text("\n\n".join(texts))
94
 
 
99
  chain_type_kwargs={"prompt": prompt},
100
  )
101
 
102
+ return f"βœ… Processed `{dest.name}` into {len(docs)} chunks."
103
+
104
 
105
  def answer_query(question: str) -> str:
106
  if qa_chain is None:
107
  return "❗ Please upload and process a PDF first."
108
  return qa_chain.run(question)
109
 
110
+
111
+ # β€”β€”β€”β€”β€” Gradio UI β€”β€”β€”β€”β€”
112
  with gr.Blocks() as demo:
113
+ gr.Markdown("## πŸ“„πŸ“· Multimodal RAG β€” Hugging Face Spaces")
114
+
115
  with gr.Row():
116
+ pdf_in = gr.File(
117
+ label="Upload PDF",
118
+ file_types=["pdf"],
119
+ type="filepath"
120
+ )
121
+ btn_proc = gr.Button("Process PDF")
122
+ status = gr.Textbox(label="Status")
123
+
124
  with gr.Row():
125
  q_in = gr.Textbox(label="Your Question")
126
+ btn_ask = gr.Button("Ask")
127
+ ans_out = gr.Textbox(label="Answer")
128
+
129
  btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
130
  btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)
131