from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse import magic_pdf import tempfile import os import json app = FastAPI() @app.post("/extract") async def extract(file: UploadFile = File(...)): content = await file.read() try: # Save the uploaded PDF to a temporary file with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf: temp_pdf.write(content) temp_pdf_path = temp_pdf.name # Process the PDF using magic_pdf.PDF class result = magic_pdf.PDF(temp_pdf_path).parse() # Convert result to dictionary output = { "pages": [] } for page in result.pages: page_data = { "page_num": page.page_num, "text": "\n".join([block.text for block in page.text_blocks]), "tables": [] } for table in page.tables: page_data["tables"].append(table.to_markdown()) output["pages"].append(page_data) # Clean up the temporary file os.unlink(temp_pdf_path) return {"result": output} except Exception as e: return JSONResponse(status_code=500, content={"error": str(e)})