RohitCSharp commited on
Commit
f06c20a
·
verified ·
1 Parent(s): b82995c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -69
app.py CHANGED
@@ -6,19 +6,18 @@ from transformers import pipeline
6
  from gtts import gTTS
7
  from bs4 import BeautifulSoup
8
  import tempfile
9
- import os
10
  import requests
11
- from PIL import Image, ImageDraw, ImageFont
12
  import subprocess
13
  import concurrent.futures
 
14
 
15
  # CPU-friendly summarization model
16
  summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
17
  llm = HuggingFacePipeline(pipeline=summary_pipe)
18
 
19
- # LangChain summarization prompt (short summary)
20
  summary_prompt = PromptTemplate.from_template("""
21
- Give a crisp and short summary of the following content (under 50 words):
22
 
23
  {text}
24
 
@@ -26,85 +25,74 @@ Summary:
26
  """)
27
  summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
28
 
 
29
  def extract_main_content(url):
30
- try:
31
- response = requests.get(url, timeout=10)
32
- soup = BeautifulSoup(response.content, "html.parser")
33
- for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]):
34
- tag.decompose()
35
- paragraphs = soup.find_all("p")
36
- content = "\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 60])
37
- return content.strip()
38
- except Exception as e:
39
- return f"Error extracting article content: {str(e)}"
40
-
41
- def create_text_image(summary_text, image_path):
42
- img = Image.new("RGB", (1280, 720), color=(0, 0, 0))
43
  draw = ImageDraw.Draw(img)
44
- font = ImageFont.load_default()
45
- wrapped = summary_text[:512] + ('...' if len(summary_text) > 512 else '')
46
- draw.text((50, 50), wrapped, fill=(255, 255, 255), font=font)
 
 
 
47
  img.save(image_path)
48
 
49
- def generate_video(image_path, audio_path, output_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  cmd = [
51
- "ffmpeg", "-y",
52
- "-loop", "1",
53
- "-i", image_path,
54
- "-i", audio_path,
55
- "-t", "15",
56
- "-c:v", "libx264",
57
- "-tune", "stillimage",
58
- "-c:a", "aac",
59
- "-b:a", "192k",
60
- "-pix_fmt", "yuv420p",
61
- "-shortest",
62
- output_path
63
  ]
64
  subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
65
 
66
- def url_to_av_summary(url):
67
- try:
68
- article_text = extract_main_content(url)
69
- if article_text.startswith("Error"):
70
- return article_text, None
71
-
72
- article_text = article_text[:3000] # Further truncated
73
- summary = summary_chain.run(text=article_text)
74
-
75
- tts = gTTS(text=summary)
76
- audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
77
- tts.save(audio_path)
78
-
79
- image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
80
- video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
81
-
82
- create_text_image(summary, image_path)
83
- generate_video(image_path, audio_path, video_path)
84
-
85
- return summary, video_path
86
-
87
- except Exception as e:
88
- return f"Error: {str(e)}", None
89
-
90
- def safe_summary_with_timeout(url, timeout_secs=60):
91
  with concurrent.futures.ThreadPoolExecutor() as executor:
92
  future = executor.submit(url_to_av_summary, url)
93
  try:
94
  return future.result(timeout=timeout_secs)
95
  except concurrent.futures.TimeoutError:
96
- return "⏱️ Processing took too long. Try a shorter article.", None
97
 
98
  iface = gr.Interface(
99
- fn=safe_summary_with_timeout,
100
- inputs=gr.Textbox(label="Article URL", placeholder="Paste a news/blog URL here..."),
101
- outputs=[
102
- gr.Textbox(label="Summary"),
103
- gr.Video(label="Video Summary")
104
- ],
105
- title="🧠 Short AV Summary from URL",
106
- description="Extracts clean article content and creates a <15 second narrated video with a short crisp summary. 100% CPU-compatible."
107
  )
108
 
109
- if __name__ == "__main__":
110
- iface.launch()
 
6
  from gtts import gTTS
7
  from bs4 import BeautifulSoup
8
  import tempfile
 
9
  import requests
 
10
  import subprocess
11
  import concurrent.futures
12
+ from PIL import Image, ImageDraw
13
 
14
  # CPU-friendly summarization model
15
  summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
16
  llm = HuggingFacePipeline(pipeline=summary_pipe)
17
 
18
+ # Prompt for <50-word promotional summary
19
  summary_prompt = PromptTemplate.from_template("""
20
+ Provide a crisp, promotional-style summary (under 50 words) of the following:
21
 
22
  {text}
23
 
 
25
  """)
26
  summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
27
 
28
+ # Extract main article content
29
  def extract_main_content(url):
30
+ resp = requests.get(url, timeout=10)
31
+ soup = BeautifulSoup(resp.content, "html.parser")
32
+ for tag in soup(["nav","header","footer","aside","script","style","noscript"]): tag.decompose()
33
+ paras = soup.find_all("p")
34
+ content = "\n".join(p.get_text() for p in paras if len(p.get_text())>60)
35
+ return content or None
36
+
37
+ # Create gradient background image
38
+ def create_background(image_path, size=(1280,720)):
39
+ img = Image.new("RGB", size)
 
 
 
40
  draw = ImageDraw.Draw(img)
41
+ for i in range(size[1]):
42
+ # gradient from dark blue to black
43
+ r = int(10 + (i/size[1])*20)
44
+ g = int(20 + (i/size[1])*30)
45
+ b = int(50 + (i/size[1])*50)
46
+ draw.line([(0, i), (size[0], i)], fill=(r, g, b))
47
  img.save(image_path)
48
 
49
+ # Generate AV summary
50
+ def url_to_av_summary(url):
51
+ text = extract_main_content(url)
52
+ if not text:
53
+ return "Failed to extract article content.", None
54
+ text = text[:3000]
55
+ summary = summary_chain.run(text=text)
56
+ summary = summary.replace('"','')
57
+ # TTS
58
+ audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
59
+ gTTS(text=summary).save(audio_path)
60
+ # Background image
61
+ bg_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
62
+ create_background(bg_path)
63
+ # Video with animated text
64
+ video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
65
  cmd = [
66
+ 'ffmpeg', '-y',
67
+ '-loop', '1', '-i', bg_path,
68
+ '-i', audio_path,
69
+ '-vf', (
70
+ "drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text='" + summary +
71
+ "':fontcolor=white:fontsize=48:box=1:[email protected]:boxborderw=5:"
72
+ "x=(w-text_w)/2:y=h-(t*(h+text_h)/15)"
73
+ ),
74
+ '-t', '15',
75
+ '-c:v', 'libx264', '-c:a', 'aac', '-pix_fmt', 'yuv420p', '-shortest', video_path
 
 
76
  ]
77
  subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
78
+ return summary, video_path
79
 
80
+ # Timeout wrapper
81
+ def safe_summary(url, timeout_secs=60):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  with concurrent.futures.ThreadPoolExecutor() as executor:
83
  future = executor.submit(url_to_av_summary, url)
84
  try:
85
  return future.result(timeout=timeout_secs)
86
  except concurrent.futures.TimeoutError:
87
+ return "⏱️ Processing timed out.", None
88
 
89
  iface = gr.Interface(
90
+ fn=safe_summary,
91
+ inputs=gr.Textbox(label="Article URL"),
92
+ outputs=[gr.Textbox(label="Summary"), gr.Video(label="Video Preview")],
93
+ title="🎥 Promo-Style AV Summary with Gradient Background",
94
+ description="Generates a <15s video with animated text over a gradient background. CPU-only, HuggingFace Spaces-ready."
 
 
 
95
  )
96
 
97
+ if __name__ == '__main__':
98
+ iface.launch()