RohitCSharp commited on
Commit
4714e38
·
verified ·
1 Parent(s): e9d5607

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -10
app.py CHANGED
@@ -5,11 +5,11 @@ from langchain.document_loaders import WebBaseLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.llms import HuggingFacePipeline
7
  from transformers import pipeline
8
- from gtts import gTTS
9
  import tempfile
10
  import os
11
  from bs4 import BeautifulSoup
12
  import requests
 
13
 
14
  # CPU-friendly summarization LLM
15
  summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
@@ -17,7 +17,7 @@ llm = HuggingFacePipeline(pipeline=summary_pipe)
17
 
18
  # Summarization prompt
19
  summary_prompt = PromptTemplate.from_template("""
20
- Summarize the following article content in a clear, concise way:
21
 
22
  {text}
23
 
@@ -31,17 +31,36 @@ def extract_main_content(url):
31
  response = requests.get(url, timeout=10)
32
  soup = BeautifulSoup(response.content, "html.parser")
33
 
34
- # Remove navigation, header, footer, sidebars, and scripts
35
  for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]):
36
  tag.decompose()
37
 
38
- # Extract main content using tags with significant paragraph text
39
  paragraphs = soup.find_all("p")
40
  content = "\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 60])
41
  return content.strip()
42
  except Exception as e:
43
  return f"Error extracting article content: {str(e)}"
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def url_to_audio_summary(url):
46
  try:
47
  article_text = extract_main_content(url)
@@ -50,12 +69,11 @@ def url_to_audio_summary(url):
50
 
51
  summary = summary_chain.run(text=article_text)
52
 
53
- # Use gTTS for TTS
54
- tts = gTTS(text=summary)
55
- temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
56
- tts.save(temp_path.name)
57
 
58
- return summary, temp_path.name
59
 
60
  except Exception as e:
61
  return f"Error: {str(e)}", None
@@ -68,7 +86,7 @@ iface = gr.Interface(
68
  gr.Audio(label="Audio Summary")
69
  ],
70
  title="URL to Audio Summary Agent",
71
- description="Summarizes only the article content from a URL and gives an audio summary. CPU-only using gTTS."
72
  )
73
 
74
  if __name__ == "__main__":
 
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.llms import HuggingFacePipeline
7
  from transformers import pipeline
 
8
  import tempfile
9
  import os
10
  from bs4 import BeautifulSoup
11
  import requests
12
+ import pyttsx3
13
 
14
  # CPU-friendly summarization LLM
15
  summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
 
17
 
18
  # Summarization prompt
19
  summary_prompt = PromptTemplate.from_template("""
20
+ Summarize the following article content in a clear, concise, and emotionally engaging manner as if you're speaking to a curious listener:
21
 
22
  {text}
23
 
 
31
  response = requests.get(url, timeout=10)
32
  soup = BeautifulSoup(response.content, "html.parser")
33
 
 
34
  for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]):
35
  tag.decompose()
36
 
 
37
  paragraphs = soup.find_all("p")
38
  content = "\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 60])
39
  return content.strip()
40
  except Exception as e:
41
  return f"Error extracting article content: {str(e)}"
42
 
43
+ def generate_human_like_audio(text):
44
+ try:
45
+ engine = pyttsx3.init()
46
+ engine.setProperty('rate', 150) # slower pace
47
+ engine.setProperty('volume', 1.0)
48
+ voices = engine.getProperty('voices')
49
+
50
+ # Choose a more natural voice if available (optional: pick female)
51
+ for voice in voices:
52
+ if 'female' in voice.name.lower():
53
+ engine.setProperty('voice', voice.id)
54
+ break
55
+
56
+ temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
57
+ engine.save_to_file(text, temp_path.name)
58
+ engine.runAndWait()
59
+
60
+ return temp_path.name
61
+ except Exception as e:
62
+ return None
63
+
64
  def url_to_audio_summary(url):
65
  try:
66
  article_text = extract_main_content(url)
 
69
 
70
  summary = summary_chain.run(text=article_text)
71
 
72
+ audio_path = generate_human_like_audio(summary)
73
+ if not audio_path:
74
+ return summary, None
 
75
 
76
+ return summary, audio_path
77
 
78
  except Exception as e:
79
  return f"Error: {str(e)}", None
 
86
  gr.Audio(label="Audio Summary")
87
  ],
88
  title="URL to Audio Summary Agent",
89
+ description="Summarizes only the article content from a URL and gives a more human-like audio summary using pyttsx3. CPU-only."
90
  )
91
 
92
  if __name__ == "__main__":