import os import io import time import PIL.Image from fastapi import FastAPI, File, UploadFile, HTTPException, Request from fastapi.responses import JSONResponse from pdf2image import convert_from_bytes from google import genai from google.genai.errors import ClientError app = FastAPI(title="PDF/Image Text Extraction API") # Global exception handler to always return JSON responses @app.exception_handler(Exception) async def global_exception_handler(request: Request, exc: Exception): return JSONResponse( status_code=500, content={"detail": str(exc)} ) # Retrieve the API key from an environment variable. API_KEY = os.getenv("API_KEY") if not API_KEY: raise ValueError("API_KEY environment variable is not set") # Initialize the GenAI client. client = genai.Client(api_key=API_KEY) def extract_text_from_image(img): """ Extracts text from a PIL image using the Google GenAI API. Includes error handling for RESOURCE_EXHAUSTED errors. """ max_retries = 3 for attempt in range(max_retries): try: response = client.models.generate_content( model="gemini-2.0-flash", contents=[ """Extract all visible text from this image and preserve the original layout and formatting as accurately as possible. - Maintain line breaks, indentation, and paragraph spacing. - Do not merge or reflow text from multiple lines into a single line. - Preserve bullet points, numbering, punctuation, and symbols exactly as shown. - Reproduce alignment (left/center/right) where possible. - For tabular or columnar data, preserve column spacing and structure. - Do not summarize or interpret the content. Just return the raw extracted text exactly as it appears in the image. Return only the extracted content. Do not add explanations, headers, or any additional comments.""", img, ] ) return response.text except ClientError as e: # Extract error code from the exception arguments error_code = e.args[0] if e.args and isinstance(e.args[0], int) else None if error_code == 429: if attempt < max_retries - 1: time.sleep(2 ** attempt) # Exponential backoff before retrying continue else: raise HTTPException( status_code=503, detail="API resource exhausted. Please try again later." ) else: raise HTTPException( status_code=500, detail=f"Error processing image: {str(e)}" ) @app.post("/upload", summary="Upload a PDF or image file", response_description="Returns extracted text as JSON") async def upload_file(file: UploadFile = File(...)): if not file.filename: raise HTTPException(status_code=400, detail="No file provided") # Read file content. file_contents = await file.read() output_text = "" if file.filename.lower().endswith(".pdf"): try: # Convert PDF bytes to images. images = convert_from_bytes(file_contents, dpi=200) except Exception as e: raise HTTPException(status_code=500, detail=f"Error converting PDF: {str(e)}") # Process each page. for idx, img in enumerate(images, start=1): page_text = extract_text_from_image(img) output_text += f"### Page {idx}\n\n{page_text}\n\n" else: try: # Process the file as an image. img = PIL.Image.open(io.BytesIO(file_contents)) except Exception as e: raise HTTPException(status_code=400, detail="Uploaded file is not a valid image") output_text += extract_text_from_image(img) + "\n\n" # Return the extracted text in a JSON response. return JSONResponse(content={"extracted_text": output_text}) @app.get("/", summary="Health Check") async def root(): return JSONResponse(content={"message": "API is up and running."})