Spaces:
Build error
Build error
| import gradio as gr | |
| import openai | |
| import base64 | |
| import io | |
| from PIL import Image | |
| import fitz # PyMuPDF | |
| import os | |
| # Load API key | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| # Prompt for extraction | |
| prompt = """ | |
| You are analyzing a medical document or an application form from a patient. | |
| Extract the following fields as JSON: | |
| - Position applied for | |
| - Office/Ministry | |
| - Duty station | |
| - First name(s) | |
| - Surname | |
| - Date of birth | |
| - Gender | |
| - Citizenship | |
| - Postal Address | |
| - Residential Address | |
| - Phone number (mobile) | |
| """ | |
| def process_pdf(pdf_file): | |
| # pdf_file is already bytes when using gr.File(type="binary") | |
| doc = fitz.open(stream=pdf_file, filetype="pdf") | |
| results = [] | |
| for page_num in range(len(doc)): | |
| page = doc.load_page(page_num) | |
| pix = page.get_pixmap(dpi=200) # Use 150-200 DPI for balance | |
| # Convert to PIL Image | |
| image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| # Convert to base64 JPEG | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="JPEG") | |
| base64_image = base64.b64encode(buffered.getvalue()).decode() | |
| # Send to GPT-4o | |
| response = openai.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "user", "content": [ | |
| {"type": "text", "text": prompt}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} | |
| ]} | |
| ], | |
| max_tokens=1000 | |
| ) | |
| results.append(response.choices[0].message.content.strip()) | |
| return "\n\n---\n\n".join(results) | |
| # Gradio UI | |
| demo = gr.Interface( | |
| fn=process_pdf, | |
| inputs=gr.File(type="binary", label="Upload PDF Form"), | |
| outputs="textbox", | |
| title="Healthelic Form Data Extractor (PDF Scanner) - OpenAI GPT-4o", | |
| description="Upload a scanned medical form in PDF format to extract key fields using GPT-4o vision model." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |