Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,13 +25,13 @@ Summary:
|
|
25 |
""")
|
26 |
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
|
27 |
|
28 |
-
# Extract main article content
|
29 |
def extract_main_content(url):
|
30 |
resp = requests.get(url, timeout=10)
|
31 |
soup = BeautifulSoup(resp.content, "html.parser")
|
32 |
for tag in soup(["nav","header","footer","aside","script","style","noscript"]): tag.decompose()
|
33 |
-
paras = soup.find_all("p")
|
34 |
-
content = "\n".join(
|
35 |
return content or None
|
36 |
|
37 |
# Create gradient background image
|
@@ -39,28 +39,26 @@ def create_background(image_path, size=(1280,720)):
|
|
39 |
img = Image.new("RGB", size)
|
40 |
draw = ImageDraw.Draw(img)
|
41 |
for i in range(size[1]):
|
42 |
-
# gradient from dark blue to black
|
43 |
r = int(10 + (i/size[1])*20)
|
44 |
g = int(20 + (i/size[1])*30)
|
45 |
b = int(50 + (i/size[1])*50)
|
46 |
draw.line([(0, i), (size[0], i)], fill=(r, g, b))
|
47 |
img.save(image_path)
|
48 |
|
49 |
-
# Generate AV summary
|
50 |
def url_to_av_summary(url):
|
51 |
text = extract_main_content(url)
|
52 |
if not text:
|
53 |
return "Failed to extract article content.", None
|
54 |
-
text = text[:
|
55 |
-
summary = summary_chain.run(text=text)
|
56 |
-
|
57 |
-
# TTS
|
58 |
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
59 |
gTTS(text=summary).save(audio_path)
|
60 |
-
|
61 |
bg_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
|
62 |
create_background(bg_path)
|
63 |
-
|
64 |
video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
|
65 |
cmd = [
|
66 |
'ffmpeg', '-y',
|
@@ -69,16 +67,16 @@ def url_to_av_summary(url):
|
|
69 |
'-vf', (
|
70 |
"drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text='" + summary +
|
71 |
"':fontcolor=white:fontsize=48:box=1:[email protected]:boxborderw=5:"
|
72 |
-
"x=(w-text_w)/2:y=h-(t*(h+text_h)/
|
73 |
),
|
74 |
-
'-t', '
|
75 |
'-c:v', 'libx264', '-c:a', 'aac', '-pix_fmt', 'yuv420p', '-shortest', video_path
|
76 |
]
|
77 |
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
78 |
return summary, video_path
|
79 |
|
80 |
-
# Timeout wrapper
|
81 |
-
def safe_summary(url, timeout_secs=
|
82 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
83 |
future = executor.submit(url_to_av_summary, url)
|
84 |
try:
|
@@ -90,9 +88,9 @@ iface = gr.Interface(
|
|
90 |
fn=safe_summary,
|
91 |
inputs=gr.Textbox(label="Article URL"),
|
92 |
outputs=[gr.Textbox(label="Summary"), gr.Video(label="Video Preview")],
|
93 |
-
title="🎥
|
94 |
-
description="
|
95 |
)
|
96 |
|
97 |
if __name__ == '__main__':
|
98 |
-
iface.launch()
|
|
|
25 |
""")
|
26 |
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
|
27 |
|
28 |
+
# Extract main article content (first 20 meaningful paragraphs)
|
29 |
def extract_main_content(url):
|
30 |
resp = requests.get(url, timeout=10)
|
31 |
soup = BeautifulSoup(resp.content, "html.parser")
|
32 |
for tag in soup(["nav","header","footer","aside","script","style","noscript"]): tag.decompose()
|
33 |
+
paras = [p.get_text() for p in soup.find_all("p") if len(p.get_text()) > 60]
|
34 |
+
content = "\n".join(paras[:20]) # limit to top 20 paragraphs
|
35 |
return content or None
|
36 |
|
37 |
# Create gradient background image
|
|
|
39 |
img = Image.new("RGB", size)
|
40 |
draw = ImageDraw.Draw(img)
|
41 |
for i in range(size[1]):
|
|
|
42 |
r = int(10 + (i/size[1])*20)
|
43 |
g = int(20 + (i/size[1])*30)
|
44 |
b = int(50 + (i/size[1])*50)
|
45 |
draw.line([(0, i), (size[0], i)], fill=(r, g, b))
|
46 |
img.save(image_path)
|
47 |
|
48 |
+
# Generate 5s AV summary
|
49 |
def url_to_av_summary(url):
|
50 |
text = extract_main_content(url)
|
51 |
if not text:
|
52 |
return "Failed to extract article content.", None
|
53 |
+
text = text[:2000] # truncate
|
54 |
+
summary = summary_chain.run(text=text).replace('"','')[:250] # short summary
|
55 |
+
|
|
|
56 |
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
57 |
gTTS(text=summary).save(audio_path)
|
58 |
+
|
59 |
bg_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
|
60 |
create_background(bg_path)
|
61 |
+
|
62 |
video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
|
63 |
cmd = [
|
64 |
'ffmpeg', '-y',
|
|
|
67 |
'-vf', (
|
68 |
"drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text='" + summary +
|
69 |
"':fontcolor=white:fontsize=48:box=1:[email protected]:boxborderw=5:"
|
70 |
+
"x=(w-text_w)/2:y=h-(t*(h+text_h)/5)"
|
71 |
),
|
72 |
+
'-t', '5',
|
73 |
'-c:v', 'libx264', '-c:a', 'aac', '-pix_fmt', 'yuv420p', '-shortest', video_path
|
74 |
]
|
75 |
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
76 |
return summary, video_path
|
77 |
|
78 |
+
# Timeout wrapper: 60s max
|
79 |
+
def safe_summary(url, timeout_secs=60):
|
80 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
81 |
future = executor.submit(url_to_av_summary, url)
|
82 |
try:
|
|
|
88 |
fn=safe_summary,
|
89 |
inputs=gr.Textbox(label="Article URL"),
|
90 |
outputs=[gr.Textbox(label="Summary"), gr.Video(label="Video Preview")],
|
91 |
+
title="🎥 5-Second AV Summary (CPU-only)",
|
92 |
+
description="Fast, CPU-only AV summary of a URL. Video capped at 5 seconds to prevent timeouts."
|
93 |
)
|
94 |
|
95 |
if __name__ == '__main__':
|
96 |
+
iface.launch()
|