Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import gradio as gr
|
|
7 |
from PIL import Image
|
8 |
from huggingface_hub import InferenceClient
|
9 |
|
10 |
-
# β
Community imports
|
11 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
12 |
from langchain_community.vectorstores import FAISS
|
13 |
from langchain_community.llms import HuggingFaceEndpoint
|
@@ -29,13 +29,16 @@ FIG_DIR.mkdir(exist_ok=True)
|
|
29 |
hf_token = os.environ["HF_TOKEN"]
|
30 |
|
31 |
# βββββ Embeddings & LLM Setup βββββ
|
32 |
-
embedding_model = HuggingFaceEmbeddings(
|
|
|
|
|
33 |
|
34 |
# LLM via HF Inference API endpoint
|
35 |
llm = HuggingFaceEndpoint(
|
36 |
endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
|
37 |
huggingfacehub_api_token=hf_token,
|
38 |
-
|
|
|
39 |
)
|
40 |
|
41 |
# Prompt
|
@@ -48,7 +51,10 @@ Answer (up to 3 sentences):
|
|
48 |
prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
|
49 |
|
50 |
# Inference client for image captioning
|
51 |
-
vision_client = InferenceClient(
|
|
|
|
|
|
|
52 |
|
53 |
# Globals (will initialize after processing)
|
54 |
vector_store = None
|
@@ -56,21 +62,17 @@ qa_chain = None
|
|
56 |
|
57 |
|
58 |
def extract_image_caption(path: str) -> str:
|
59 |
-
"""Return an autogenerated caption for an image file."""
|
60 |
with Image.open(path) as img:
|
61 |
return vision_client.image_to_text(img)
|
62 |
|
63 |
|
64 |
def process_pdf(pdf_file) -> str:
|
65 |
-
"""Save, parse, chunk, embed & index a PDF (text + images)."""
|
66 |
global vector_store, qa_chain
|
67 |
|
68 |
-
# 1οΈβ£ Save PDF
|
69 |
out_path = PDF_DIR / pdf_file.name
|
70 |
with open(out_path, "wb") as f:
|
71 |
f.write(pdf_file.read())
|
72 |
|
73 |
-
# 2οΈβ£ Partition into text + image blocks
|
74 |
elems = partition_pdf(
|
75 |
str(out_path),
|
76 |
strategy=PartitionStrategy.HI_RES,
|
@@ -78,14 +80,11 @@ def process_pdf(pdf_file) -> str:
|
|
78 |
extract_image_block_output_dir=str(FIG_DIR),
|
79 |
)
|
80 |
|
81 |
-
# 3οΈβ£ Collect text
|
82 |
texts = [el.text for el in elems if el.category not in ("Image", "Table")]
|
83 |
|
84 |
-
# 4οΈβ£ Caption each image
|
85 |
for img_file in FIG_DIR.iterdir():
|
86 |
texts.append(extract_image_caption(str(img_file)))
|
87 |
|
88 |
-
# 5οΈβ£ Split & index
|
89 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
90 |
docs = splitter.split_text("\n\n".join(texts))
|
91 |
|
|
|
7 |
from PIL import Image
|
8 |
from huggingface_hub import InferenceClient
|
9 |
|
10 |
+
# β
Community imports
|
11 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
12 |
from langchain_community.vectorstores import FAISS
|
13 |
from langchain_community.llms import HuggingFaceEndpoint
|
|
|
29 |
hf_token = os.environ["HF_TOKEN"]
|
30 |
|
31 |
# βββββ Embeddings & LLM Setup βββββ
|
32 |
+
embedding_model = HuggingFaceEmbeddings(
|
33 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
34 |
+
)
|
35 |
|
36 |
# LLM via HF Inference API endpoint
|
37 |
llm = HuggingFaceEndpoint(
|
38 |
endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
|
39 |
huggingfacehub_api_token=hf_token,
|
40 |
+
temperature=0.5,
|
41 |
+
max_length=512,
|
42 |
)
|
43 |
|
44 |
# Prompt
|
|
|
51 |
prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
|
52 |
|
53 |
# Inference client for image captioning
|
54 |
+
vision_client = InferenceClient(
|
55 |
+
repo_id="Salesforce/blip-image-captioning-base",
|
56 |
+
token=hf_token,
|
57 |
+
)
|
58 |
|
59 |
# Globals (will initialize after processing)
|
60 |
vector_store = None
|
|
|
62 |
|
63 |
|
64 |
def extract_image_caption(path: str) -> str:
|
|
|
65 |
with Image.open(path) as img:
|
66 |
return vision_client.image_to_text(img)
|
67 |
|
68 |
|
69 |
def process_pdf(pdf_file) -> str:
|
|
|
70 |
global vector_store, qa_chain
|
71 |
|
|
|
72 |
out_path = PDF_DIR / pdf_file.name
|
73 |
with open(out_path, "wb") as f:
|
74 |
f.write(pdf_file.read())
|
75 |
|
|
|
76 |
elems = partition_pdf(
|
77 |
str(out_path),
|
78 |
strategy=PartitionStrategy.HI_RES,
|
|
|
80 |
extract_image_block_output_dir=str(FIG_DIR),
|
81 |
)
|
82 |
|
|
|
83 |
texts = [el.text for el in elems if el.category not in ("Image", "Table")]
|
84 |
|
|
|
85 |
for img_file in FIG_DIR.iterdir():
|
86 |
texts.append(extract_image_caption(str(img_file)))
|
87 |
|
|
|
88 |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
89 |
docs = splitter.split_text("\n\n".join(texts))
|
90 |
|