hermanda commited on
Commit
002eb86
·
verified ·
1 Parent(s): af6a56a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -0
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain_core.prompts import PromptTemplate
3
+ from langchain.chains.summarize import load_summarize_chain
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain_community.callbacks import get_openai_callback
7
+ import os
8
+ from dotenv import load_dotenv
9
+
10
+ os.makedirs("data", exist_ok=True)
11
+
12
+ load_dotenv()
13
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
+
15
+ def summarize_pdf(pdf_file, custom_prompt="", openai_api_key=None):
16
+ """
17
+ Summarizes the content of a PDF file using a custom prompt.
18
+
19
+ Args:
20
+ pdf_file (UploadedFile): The uploaded PDF file.
21
+ custom_prompt (str): The prompt for summarization.
22
+ openai_api_key (str, optional): User-provided OpenAI API key.
23
+
24
+ Returns:
25
+ tuple: Summary in markdown format and the cost in USD.
26
+ """
27
+ pdf_path = os.path.join("data", "tmp.pdf")
28
+ with open(pdf_path, "wb") as f:
29
+ f.write(pdf_file)
30
+
31
+ api_key = openai_api_key if openai_api_key else OPENAI_API_KEY
32
+
33
+ if not api_key:
34
+ return "Error: No OpenAI API key provided.", "N/A"
35
+
36
+ with get_openai_callback() as cb:
37
+ try:
38
+ model = ChatOpenAI(
39
+ model="gpt-4o-mini",
40
+ temperature=0,
41
+ openai_api_key=api_key
42
+ )
43
+
44
+ loader = PyPDFLoader(pdf_path)
45
+ docs = loader.load_and_split()
46
+
47
+ if not custom_prompt.strip():
48
+ custom_prompt = default_prompt
49
+
50
+ prompt_template = (
51
+ custom_prompt
52
+ + """
53
+
54
+ {text}
55
+
56
+ SUMMARY:"""
57
+ )
58
+ PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
59
+ chain = load_summarize_chain(
60
+ model,
61
+ chain_type="map_reduce",
62
+ map_prompt=PROMPT,
63
+ combine_prompt=PROMPT
64
+ )
65
+ summary = chain({"input_documents": docs}, return_only_outputs=True)["output_text"]
66
+ total_cost = cb.total_cost
67
+
68
+ return summary, f"${total_cost:.4f}"
69
+
70
+ except Exception as e:
71
+ return f"An error occurred: {str(e)}", "N/A"
72
+
73
+ default_prompt = (
74
+ "Summarize this paper. Return markdown, keep it in a language that scientists understand, "
75
+ "but the purpose is to highlight the key takeaways, so that we save time for the reader."
76
+ )
77
+
78
+ with gr.Blocks() as demo:
79
+ gr.Markdown("# PDF Summarizer 📝")
80
+ gr.Markdown("Upload a PDF, customize your summarization prompt, and get a concise summary along with the processing cost.")
81
+
82
+ with gr.Row():
83
+ with gr.Column():
84
+ if OPENAI_API_KEY is None:
85
+ api_key_input = gr.Textbox(
86
+ label="OpenAI API Key",
87
+ type="password",
88
+ placeholder="Enter your OpenAI API key."
89
+ )
90
+ else:
91
+ api_key_input = gr.Textbox(
92
+ label="OpenAI API Key (Optional)",
93
+ type="password",
94
+ placeholder="Enter your OpenAI API key if you want to override the global key."
95
+ )
96
+ prompt_input = gr.Textbox(
97
+ label="Custom Prompt",
98
+ lines=4,
99
+ value=default_prompt,
100
+ placeholder="Enter your custom summarization prompt here..."
101
+ )
102
+ pdf_input = gr.File(
103
+ label="Upload PDF",
104
+ type="binary",
105
+ file_types=[".pdf"],
106
+ )
107
+ summarize_btn = gr.Button("Summarize")
108
+
109
+ with gr.Column():
110
+ cost_output = gr.Textbox(label="Approximate Cost (USD)", interactive=False)
111
+ summary_output = gr.Markdown(label="Summary")
112
+
113
+
114
+ summarize_btn.click(
115
+ fn=summarize_pdf,
116
+ inputs=[pdf_input, prompt_input, api_key_input],
117
+ outputs=[summary_output, cost_output]
118
+ )
119
+
120
+ gr.Markdown("---")
121
+ gr.Markdown("Created by [Daniel Herman](https://www.hermandaniel.com)")
122
+
123
+ if __name__ == "__main__":
124
+ demo.launch()