hermanda commited on
Commit
14349da
·
verified ·
1 Parent(s): 002eb86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -61
app.py CHANGED
@@ -1,103 +1,113 @@
 
 
 
1
  import gradio as gr
2
- from langchain_core.prompts import PromptTemplate
3
  from langchain.chains.summarize import load_summarize_chain
 
 
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain_openai import ChatOpenAI
6
- from langchain_community.callbacks import get_openai_callback
7
- import os
8
- from dotenv import load_dotenv
9
 
10
- os.makedirs("data", exist_ok=True)
11
 
 
12
  load_dotenv()
13
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
 
15
- def summarize_pdf(pdf_file, custom_prompt="", openai_api_key=None):
 
 
 
16
  """
17
  Summarizes the content of a PDF file using a custom prompt.
18
 
19
  Args:
20
- pdf_file (UploadedFile): The uploaded PDF file.
21
  custom_prompt (str): The prompt for summarization.
22
- openai_api_key (str, optional): User-provided OpenAI API key.
23
 
24
  Returns:
25
- tuple: Summary in markdown format and the cost in USD.
26
  """
27
- pdf_path = os.path.join("data", "tmp.pdf")
28
- with open(pdf_path, "wb") as f:
29
- f.write(pdf_file)
 
 
 
 
 
30
 
31
- api_key = openai_api_key if openai_api_key else OPENAI_API_KEY
32
-
33
  if not api_key:
34
  return "Error: No OpenAI API key provided.", "N/A"
35
 
36
- with get_openai_callback() as cb:
37
  try:
38
  model = ChatOpenAI(
39
- model="gpt-4o-mini",
40
- temperature=0,
41
- openai_api_key=api_key
42
  )
43
 
44
  loader = PyPDFLoader(pdf_path)
45
- docs = loader.load_and_split()
46
-
47
- if not custom_prompt.strip():
48
- custom_prompt = default_prompt
49
-
50
- prompt_template = (
51
- custom_prompt
52
- + """
53
 
54
- {text}
 
 
55
 
56
- SUMMARY:"""
 
 
 
 
57
  )
58
- PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
59
- chain = load_summarize_chain(
60
- model,
61
- chain_type="map_reduce",
62
- map_prompt=PROMPT,
63
- combine_prompt=PROMPT
64
- )
65
- summary = chain({"input_documents": docs}, return_only_outputs=True)["output_text"]
66
- total_cost = cb.total_cost
67
 
68
  return summary, f"${total_cost:.4f}"
69
-
70
  except Exception as e:
71
- return f"An error occurred: {str(e)}", "N/A"
 
72
 
73
- default_prompt = (
74
  "Summarize this paper. Return markdown, keep it in a language that scientists understand, "
75
  "but the purpose is to highlight the key takeaways, so that we save time for the reader."
76
  )
77
-
78
  with gr.Blocks() as demo:
79
  gr.Markdown("# PDF Summarizer 📝")
80
- gr.Markdown("Upload a PDF, customize your summarization prompt, and get a concise summary along with the processing cost.")
 
 
81
 
82
  with gr.Row():
83
  with gr.Column():
 
 
 
84
  if OPENAI_API_KEY is None:
85
- api_key_input = gr.Textbox(
86
- label="OpenAI API Key",
87
- type="password",
88
- placeholder="Enter your OpenAI API key."
89
- )
90
  else:
91
- api_key_input = gr.Textbox(
92
- label="OpenAI API Key (Optional)",
93
- type="password",
94
- placeholder="Enter your OpenAI API key if you want to override the global key."
95
  )
 
 
 
 
 
 
96
  prompt_input = gr.Textbox(
97
  label="Custom Prompt",
98
  lines=4,
99
  value=default_prompt,
100
- placeholder="Enter your custom summarization prompt here..."
101
  )
102
  pdf_input = gr.File(
103
  label="Upload PDF",
@@ -105,20 +115,19 @@ with gr.Blocks() as demo:
105
  file_types=[".pdf"],
106
  )
107
  summarize_btn = gr.Button("Summarize")
108
-
109
  with gr.Column():
110
  cost_output = gr.Textbox(label="Approximate Cost (USD)", interactive=False)
111
  summary_output = gr.Markdown(label="Summary")
112
-
113
-
114
  summarize_btn.click(
115
  fn=summarize_pdf,
116
  inputs=[pdf_input, prompt_input, api_key_input],
117
- outputs=[summary_output, cost_output]
118
  )
119
-
120
  gr.Markdown("---")
121
- gr.Markdown("Created by [Daniel Herman](https://www.hermandaniel.com)")
122
-
123
  if __name__ == "__main__":
124
- demo.launch()
 
1
+ import os
2
+ from typing import Optional, Tuple
3
+
4
  import gradio as gr
5
+ from dotenv import load_dotenv
6
  from langchain.chains.summarize import load_summarize_chain
7
+ from langchain_core.prompts import PromptTemplate
8
+ from langchain_community.callbacks import get_openai_callback
9
  from langchain_community.document_loaders import PyPDFLoader
10
  from langchain_openai import ChatOpenAI
 
 
 
11
 
 
12
 
13
+ os.makedirs("data", exist_ok=True)
14
  load_dotenv()
15
+ OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY")
16
 
17
+
18
+ def summarize_pdf(
19
+ pdf_file: bytes, custom_prompt: str = "", openai_api_key: Optional[str] = None
20
+ ) -> Tuple[str, str]:
21
  """
22
  Summarizes the content of a PDF file using a custom prompt.
23
 
24
  Args:
25
+ pdf_file (bytes): The uploaded PDF file as bytes.
26
  custom_prompt (str): The prompt for summarization.
27
+ openai_api_key (Optional[str]): User-provided OpenAI API key.
28
 
29
  Returns:
30
+ Tuple[str, str]: Summary in markdown format and the cost in USD.
31
  """
32
+ pdf_path: str = os.path.join("data", "tmp.pdf")
33
+ try:
34
+ with open(pdf_path, "wb") as f:
35
+ f.write(pdf_file)
36
+ except IOError as e:
37
+ return f"Failed to write PDF file: {e}", "N/A"
38
+
39
+ api_key: Optional[str] = openai_api_key or OPENAI_API_KEY
40
 
 
 
41
  if not api_key:
42
  return "Error: No OpenAI API key provided.", "N/A"
43
 
44
+ with get_openai_callback() as callback:
45
  try:
46
  model = ChatOpenAI(
47
+ model="gpt-4-mini", # Verify the correct model name
48
+ temperature=0.0,
49
+ openai_api_key=api_key,
50
  )
51
 
52
  loader = PyPDFLoader(pdf_path)
53
+ documents = loader.load_and_split()
 
 
 
 
 
 
 
54
 
55
+ prompt_text: str = custom_prompt.strip() or default_prompt
56
+ prompt_template: str = f"{prompt_text}\n\n{{text}}\n\nSUMMARY:"
57
+ prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
58
 
59
+ summarize_chain = load_summarize_chain(
60
+ llm=model,
61
+ chain_type="map_reduce",
62
+ map_prompt=prompt,
63
+ combine_prompt=prompt,
64
  )
65
+
66
+ chain_input = {"input_documents": documents}
67
+ result = summarize_chain(chain_input, return_only_outputs=True)
68
+ summary: str = result.get("output_text", "No summary generated.")
69
+ total_cost: float = callback.total_cost
 
 
 
 
70
 
71
  return summary, f"${total_cost:.4f}"
72
+
73
  except Exception as e:
74
+ return f"An error occurred during summarization: {str(e)}", "N/A"
75
+
76
 
77
+ default_prompt: str = (
78
  "Summarize this paper. Return markdown, keep it in a language that scientists understand, "
79
  "but the purpose is to highlight the key takeaways, so that we save time for the reader."
80
  )
 
81
  with gr.Blocks() as demo:
82
  gr.Markdown("# PDF Summarizer 📝")
83
+ gr.Markdown(
84
+ "Upload a PDF, customize your summarization prompt, and get a concise summary along with the processing cost."
85
+ )
86
 
87
  with gr.Row():
88
  with gr.Column():
89
+ api_key_label: str
90
+ placeholder_text: str
91
+
92
  if OPENAI_API_KEY is None:
93
+ api_key_label = "OpenAI API Key"
94
+ placeholder_text = "Enter your OpenAI API key."
 
 
 
95
  else:
96
+ api_key_label = "OpenAI API Key (Optional)"
97
+ placeholder_text = (
98
+ "Enter your OpenAI API key if you want to override the global key."
 
99
  )
100
+
101
+ api_key_input = gr.Textbox(
102
+ label=api_key_label,
103
+ type="password",
104
+ placeholder=placeholder_text,
105
+ )
106
  prompt_input = gr.Textbox(
107
  label="Custom Prompt",
108
  lines=4,
109
  value=default_prompt,
110
+ placeholder="Enter your custom summarization prompt here...",
111
  )
112
  pdf_input = gr.File(
113
  label="Upload PDF",
 
115
  file_types=[".pdf"],
116
  )
117
  summarize_btn = gr.Button("Summarize")
118
+
119
  with gr.Column():
120
  cost_output = gr.Textbox(label="Approximate Cost (USD)", interactive=False)
121
  summary_output = gr.Markdown(label="Summary")
122
+
 
123
  summarize_btn.click(
124
  fn=summarize_pdf,
125
  inputs=[pdf_input, prompt_input, api_key_input],
126
+ outputs=[summary_output, cost_output],
127
  )
128
+
129
  gr.Markdown("---")
130
+ gr.Markdown("Created by [Daniel Herman](https://www.hermandaniel.com), check out the code [detrin/llm-pdf-summarization](https://github.com/detrin/llm-pdf-summarization).")
131
+
132
  if __name__ == "__main__":
133
+ demo.launch(server_name="0.0.0.0", server_port=3000)