Shuja1401 commited on
Commit
04a1c23
Β·
verified Β·
1 Parent(s): d27cd71

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -146
app.py DELETED
@@ -1,146 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """Paper_News_Gradio_App.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1_JHJRpT4KWmECR-ep25CGZ0eM55Bm7TK
8
- """
9
-
10
- !pip install -q gradio PyMuPDF tiktoken openai
11
-
12
- import gradio as gr
13
- import fitz # PyMuPDF
14
- import re
15
- import tiktoken
16
- import time
17
- from openai import OpenAI
18
- import os
19
-
20
- # Set your API key securely
21
- os.environ["OPENAI_API_KEY"] = "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" # Replace with your API key
22
- client = OpenAI()
23
-
24
- # --- Step 3: Extract and clean PDF text ---
25
- def extract_text_from_pdf(pdf_file_path):
26
- text = ""
27
- with fitz.open(pdf_file_path) as doc:
28
- for page in doc:
29
- text += page.get_text()
30
- return text
31
-
32
-
33
- def clean_text(text):
34
- text = re.sub(r'\s+', ' ', text)
35
- text = re.sub(r'[^\x20-\x7E]+', '', text)
36
- return text.strip()
37
-
38
- def split_into_chunks(text, max_tokens=1000):
39
- encoding = tiktoken.get_encoding("cl100k_base")
40
- words = text.split()
41
- chunks, current_chunk, current_tokens = [], [], 0
42
-
43
- for word in words:
44
- tokens = len(encoding.encode(word))
45
- if current_tokens + tokens > max_tokens:
46
- chunks.append(" ".join(current_chunk))
47
- current_chunk, current_tokens = [word], tokens
48
- else:
49
- current_chunk.append(word)
50
- current_tokens += tokens
51
-
52
- if current_chunk:
53
- chunks.append(" ".join(current_chunk))
54
-
55
- return chunks
56
-
57
- def summarize_chunk(chunk):
58
- try:
59
- response = client.chat.completions.create(
60
- model="gpt-3.5-turbo",
61
- messages=[
62
- {"role": "system", "content": "You are a helpful assistant that summarizes documents."},
63
- {"role": "user", "content": f"Summarize the following text:\n\n{chunk}"}
64
- ],
65
- temperature=0.3
66
- )
67
- return response.choices[0].message.content
68
- except Exception as e:
69
- return f"Error: {e}"
70
-
71
- def generate_special_summaries(summary_text):
72
- prompt = f"""
73
- From the text below, generate the following:
74
- 1. ELI5 (Explain Like I’m 5)
75
- 2. Why It Matters
76
- 3. TL;DR (One-line summary)
77
-
78
- Text:
79
- \"\"\"
80
- {summary_text}
81
- \"\"\"
82
- """
83
- response = client.chat.completions.create(
84
- model="gpt-3.5-turbo",
85
- messages=[
86
- {"role": "system", "content": "You are an expert summarizer."},
87
- {"role": "user", "content": prompt}
88
- ]
89
- )
90
-
91
- full_reply = response.choices[0].message.content.strip()
92
-
93
- # Optional: extract segments using string splitting (or just return raw if formatted well)
94
- return full_reply
95
-
96
- def process_pdf(pdf_file):
97
- try:
98
- raw_text = extract_text_from_pdf(pdf_file)
99
- cleaned_text = clean_text(raw_text)
100
- chunks = split_into_chunks(cleaned_text)
101
-
102
- summaries = []
103
- for i, chunk in enumerate(chunks):
104
- summary = summarize_chunk(chunk)
105
- summaries.append(summary)
106
- time.sleep(1.5)
107
-
108
- full_summary = "\n\n".join(summaries)
109
- special = generate_special_summaries(full_summary)
110
-
111
- # Split the special summary into parts
112
- eli5, why_matters, tldr = "", "", ""
113
- for section in special.split("\n\n"):
114
- if section.lower().startswith("1. eli5"):
115
- eli5 = section.replace("1. ELI5:", "").strip()
116
- elif section.lower().startswith("2. why"):
117
- why_matters = section.replace("2. Why It Matters:", "").strip()
118
- elif section.lower().startswith("3. tl;dr") or section.lower().startswith("3. tldr"):
119
- tldr = section.replace("3. TL;DR:", "").replace("3. Tldr:", "").strip()
120
-
121
- return full_summary, eli5, why_matters, tldr
122
-
123
- except Exception as e:
124
- error_msg = f"❌ Error: {str(e)}"
125
- return error_msg, error_msg, error_msg, error_msg
126
-
127
- with gr.Blocks() as demo:
128
- gr.Markdown("### πŸ“š Paper News Summarizer")
129
- gr.Markdown("Upload a research paper PDF and get a human-friendly summary, ELI5, and TL;DR. Powered by GPT-3.5.")
130
-
131
- with gr.Row():
132
- pdf_input = gr.File(label="πŸ“„ Upload Research Paper (PDF)", file_types=[".pdf"])
133
- submit_btn = gr.Button("Submit", variant="primary")
134
- clear_btn = gr.Button("Clear")
135
-
136
- summary_output = gr.Textbox(label="πŸ“˜ Full Summary", lines=10)
137
- eli5_output = gr.Textbox(label="πŸ§’ ELI5", lines=3)
138
- why_output = gr.Textbox(label="🎯 Why It Matters", lines=3)
139
- tldr_output = gr.Textbox(label="⚑ TL;DR", lines=2)
140
-
141
- submit_btn.click(fn=process_pdf, inputs=pdf_input,
142
- outputs=[summary_output, eli5_output, why_output, tldr_output])
143
-
144
- clear_btn.click(lambda: ("", "", "", ""), outputs=[summary_output, eli5_output, why_output, tldr_output])
145
-
146
- demo.launch(debug=True)