Spaces:
Running
Running
| from dataclasses import dataclass | |
| from typing import List, Optional, Tuple | |
| import base64 | |
| from io import BytesIO | |
| from PIL import Image | |
| from smolagents import ChatMessage | |
| def encode_image_to_base64(image): | |
| """Encodes a PIL image to a base64 string.""" | |
| buffered = BytesIO() | |
| image.save(buffered, format="JPEG") | |
| return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| DEFAULT_SYSTEM_PROMPT = \ | |
| """You are a smart assistant designed to answer questions about a PDF document. | |
| You are given relevant information in the form of PDF pages preceded by their metadata: document title, page identifier, surrounding context. | |
| Use them to construct a short response to the question, and cite your sources in the following format: (document, page number). | |
| If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents. | |
| Give detailed and extensive answers, only containing info in the pages you are given. | |
| You can answer using information contained in plots and figures if necessary. | |
| Answer in the same language as the query.""" | |
| def _build_query(query, pages): | |
| messages = [] | |
| messages.append({"type": "text", "text": "PDF pages:\n"}) | |
| for page in pages: | |
| capt = page.caption | |
| if capt is not None: | |
| messages.append({ | |
| "type": "text", | |
| "text": capt | |
| }) | |
| messages.append({ | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{encode_image_to_base64(page.image)}" | |
| }, | |
| }) | |
| messages.append({"type": "text", "text": f"Query:\n{query}"}) | |
| return messages | |
| def query_openai(query, pages, api_key=None, system_prompt=DEFAULT_SYSTEM_PROMPT, model="gpt-4o-mini") -> ChatMessage: | |
| """Calls OpenAI's GPT-4o-mini with the query and image data.""" | |
| if api_key and api_key.startswith("sk"): | |
| try: | |
| from openai import OpenAI | |
| client = OpenAI(api_key=api_key.strip()) | |
| response = client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": system_prompt | |
| }, | |
| { | |
| "role": "user", | |
| "content": _build_query(query, pages) | |
| } | |
| ], | |
| max_tokens=500, | |
| ) | |
| message = ChatMessage.from_dict( | |
| response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}) | |
| ) | |
| message.raw = response | |
| return message | |
| except Exception as e: | |
| return "OpenAI API connection failure. Verify the provided key is correct (sk-***)." | |
| return "Enter your OpenAI API key to get a custom response" | |
| CONTEXT_SYSTEM_PROMPT = \ | |
| """You are a smart assistant designed to extract context of PDF pages. | |
| Give concise answers, only containing info in the pages you are given. | |
| You can answer using information contained in plots and figures if necessary.""" | |
| RAG_SYSTEM_PROMPT = \ | |
| """ You are a smart assistant designed to answer questions about a PDF document. | |
| You are given relevant information in the form of PDF pages preceded by their metadata: document title, page identifier, surrounding context. | |
| Use them to construct a response to the question, and cite your sources. | |
| Use the following citation format: | |
| "Some information from a first document [1, p.Page Number]. Some information from the same first document but at a different page [1, p.Page Number]. Some more information from another document [2, p.Page Number]. | |
| ... | |
| Sources: | |
| [1] Document Title | |
| [2] Another Document Title" | |
| You can answer using information contained in plots and figures if necessary. | |
| If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents. | |
| Give detailed answers, only containing info in the pages you are given. | |
| Answer in the same language as the query.""" | |
| class Metadata: | |
| doc_title: str | |
| page_id: int | |
| context: Optional[str] = None | |
| def __str__(self): | |
| return f"Document: {self.doc_title}, Page ID: {self.page_id}, Context: {self.context}" | |
| class Page: | |
| image: Image.Image | |
| metadata: Optional[Metadata] = None | |
| def caption(self): | |
| if self.metadata is None: | |
| return None | |
| return f"Document: {self.metadata.doc_title}, Context: {self.metadata.context}" | |
| def __hash__(self): | |
| return hash(self.image) |