Spaces:
Sleeping
Sleeping
File size: 4,671 Bytes
14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da 002eb86 14349da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
from typing import Optional, Tuple
import gradio as gr
from dotenv import load_dotenv
from langchain.chains.summarize import load_summarize_chain
from langchain_core.prompts import PromptTemplate
from langchain_community.callbacks import get_openai_callback
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI
os.makedirs("data", exist_ok=True)
load_dotenv()
OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY")
def summarize_pdf(
pdf_file: bytes, custom_prompt: str = "", openai_api_key: Optional[str] = None
) -> Tuple[str, str]:
"""
Summarizes the content of a PDF file using a custom prompt.
Args:
pdf_file (bytes): The uploaded PDF file as bytes.
custom_prompt (str): The prompt for summarization.
openai_api_key (Optional[str]): User-provided OpenAI API key.
Returns:
Tuple[str, str]: Summary in markdown format and the cost in USD.
"""
pdf_path: str = os.path.join("data", "tmp.pdf")
try:
with open(pdf_path, "wb") as f:
f.write(pdf_file)
except IOError as e:
return f"Failed to write PDF file: {e}", "N/A"
api_key: Optional[str] = openai_api_key or OPENAI_API_KEY
if not api_key:
return "Error: No OpenAI API key provided.", "N/A"
with get_openai_callback() as callback:
try:
model = ChatOpenAI(
model="gpt-4-mini", # Verify the correct model name
temperature=0.0,
openai_api_key=api_key,
)
loader = PyPDFLoader(pdf_path)
documents = loader.load_and_split()
prompt_text: str = custom_prompt.strip() or default_prompt
prompt_template: str = f"{prompt_text}\n\n{{text}}\n\nSUMMARY:"
prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
summarize_chain = load_summarize_chain(
llm=model,
chain_type="map_reduce",
map_prompt=prompt,
combine_prompt=prompt,
)
chain_input = {"input_documents": documents}
result = summarize_chain(chain_input, return_only_outputs=True)
summary: str = result.get("output_text", "No summary generated.")
total_cost: float = callback.total_cost
return summary, f"${total_cost:.4f}"
except Exception as e:
return f"An error occurred during summarization: {str(e)}", "N/A"
default_prompt: str = (
"Summarize this paper. Return markdown, keep it in a language that scientists understand, "
"but the purpose is to highlight the key takeaways, so that we save time for the reader."
)
with gr.Blocks() as demo:
gr.Markdown("# PDF Summarizer π")
gr.Markdown(
"Upload a PDF, customize your summarization prompt, and get a concise summary along with the processing cost."
)
with gr.Row():
with gr.Column():
api_key_label: str
placeholder_text: str
if OPENAI_API_KEY is None:
api_key_label = "OpenAI API Key"
placeholder_text = "Enter your OpenAI API key."
else:
api_key_label = "OpenAI API Key (Optional)"
placeholder_text = (
"Enter your OpenAI API key if you want to override the global key."
)
api_key_input = gr.Textbox(
label=api_key_label,
type="password",
placeholder=placeholder_text,
)
prompt_input = gr.Textbox(
label="Custom Prompt",
lines=4,
value=default_prompt,
placeholder="Enter your custom summarization prompt here...",
)
pdf_input = gr.File(
label="Upload PDF",
type="binary",
file_types=[".pdf"],
)
summarize_btn = gr.Button("Summarize")
with gr.Column():
cost_output = gr.Textbox(label="Approximate Cost (USD)", interactive=False)
summary_output = gr.Markdown(label="Summary")
summarize_btn.click(
fn=summarize_pdf,
inputs=[pdf_input, prompt_input, api_key_input],
outputs=[summary_output, cost_output],
)
gr.Markdown("---")
gr.Markdown("Created by [Daniel Herman](https://www.hermandaniel.com), check out the code [detrin/llm-pdf-summarization](https://github.com/detrin/llm-pdf-summarization).")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=3000) |