Spaces:
Running
Running
File size: 4,832 Bytes
70f7106 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
from dataclasses import dataclass
from typing import List, Optional, Tuple
import base64
from io import BytesIO
from PIL import Image
from smolagents import ChatMessage
def encode_image_to_base64(image):
"""Encodes a PIL image to a base64 string."""
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
DEFAULT_SYSTEM_PROMPT = \
"""You are a smart assistant designed to answer questions about a PDF document.
You are given relevant information in the form of PDF pages preceded by their metadata: document title, page identifier, surrounding context.
Use them to construct a short response to the question, and cite your sources in the following format: (document, page number).
If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents.
Give detailed and extensive answers, only containing info in the pages you are given.
You can answer using information contained in plots and figures if necessary.
Answer in the same language as the query."""
def _build_query(query, pages):
messages = []
messages.append({"type": "text", "text": "PDF pages:\n"})
for page in pages:
capt = page.caption
if capt is not None:
messages.append({
"type": "text",
"text": capt
})
messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{encode_image_to_base64(page.image)}"
},
})
messages.append({"type": "text", "text": f"Query:\n{query}"})
return messages
def query_openai(query, pages, api_key=None, system_prompt=DEFAULT_SYSTEM_PROMPT, model="gpt-4o-mini") -> ChatMessage:
"""Calls OpenAI's GPT-4o-mini with the query and image data."""
if api_key and api_key.startswith("sk"):
try:
from openai import OpenAI
client = OpenAI(api_key=api_key.strip())
response = client.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": _build_query(query, pages)
}
],
max_tokens=500,
)
message = ChatMessage.from_dict(
response.choices[0].message.model_dump(include={"role", "content", "tool_calls"})
)
message.raw = response
return message
except Exception as e:
return "OpenAI API connection failure. Verify the provided key is correct (sk-***)."
return "Enter your OpenAI API key to get a custom response"
CONTEXT_SYSTEM_PROMPT = \
"""You are a smart assistant designed to extract context of PDF pages.
Give concise answers, only containing info in the pages you are given.
You can answer using information contained in plots and figures if necessary."""
RAG_SYSTEM_PROMPT = \
""" You are a smart assistant designed to answer questions about a PDF document.
You are given relevant information in the form of PDF pages preceded by their metadata: document title, page identifier, surrounding context.
Use them to construct a response to the question, and cite your sources.
Use the following citation format:
"Some information from a first document [1, p.Page Number]. Some information from the same first document but at a different page [1, p.Page Number]. Some more information from another document [2, p.Page Number].
...
Sources:
[1] Document Title
[2] Another Document Title"
You can answer using information contained in plots and figures if necessary.
If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents.
Give detailed answers, only containing info in the pages you are given.
Answer in the same language as the query."""
@dataclass
class Metadata:
doc_title: str
page_id: int
context: Optional[str] = None
def __str__(self):
return f"Document: {self.doc_title}, Page ID: {self.page_id}, Context: {self.context}"
@dataclass
class Page:
image: Image.Image
metadata: Optional[Metadata] = None
@property
def caption(self):
if self.metadata is None:
return None
return f"Document: {self.metadata.doc_title}, Context: {self.metadata.context}"
def __hash__(self):
return hash(self.image) |