RohitCSharp commited on
Commit
d9efe10
·
verified ·
1 Parent(s): 3b53100

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.chains import LLMChain
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain.document_loaders import WebBaseLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.llms import HuggingFacePipeline
7
+ from transformers import pipeline
8
+ import tempfile
9
+ import os
10
+
11
+ # Step 1: CPU-friendly summarization LLM (Flan-T5 Small)
12
+ summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
13
+ llm = HuggingFacePipeline(pipeline=summary_pipe)
14
+
15
+ # Step 2: Summarization Prompt
16
+ summary_prompt = PromptTemplate.from_template("""
17
+ Summarize the following webpage content in a clear, concise way:
18
+
19
+ {text}
20
+
21
+ Summary:
22
+ """)
23
+
24
+ summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
25
+
26
+ # Step 3: URL to Text -> Summarize -> Text to Speech
27
+
28
+ def url_to_audio_summary(url):
29
+ try:
30
+ # Load and split text
31
+ loader = WebBaseLoader(url)
32
+ docs = loader.load()
33
+ splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
34
+ splits = splitter.split_documents(docs)
35
+
36
+ full_text = "\n".join([s.page_content for s in splits])
37
+
38
+ # Summarize
39
+ summary = summary_chain.run(text=full_text)
40
+
41
+ # Text to Speech
42
+ tts_pipe = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits", device=-1)
43
+ audio = tts_pipe(summary)["audio"]
44
+
45
+ # Save audio to temp WAV
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
47
+ f.write(audio)
48
+ audio_path = f.name
49
+
50
+ return summary, audio_path
51
+
52
+ except Exception as e:
53
+ return f"Error: {str(e)}", None
54
+
55
+ # Step 4: Gradio Interface
56
+ iface = gr.Interface(
57
+ fn=url_to_audio_summary,
58
+ inputs=gr.Textbox(label="Article URL", placeholder="Paste a news/blog URL here..."),
59
+ outputs=[
60
+ gr.Textbox(label="Summary"),
61
+ gr.Audio(label="Audio Summary")
62
+ ],
63
+ title="🗣️ URL to Audio Summary Agent",
64
+ description="An agent that reads web articles and gives you an audio summary. CPU-only. Built with LangChain + Hugging Face."
65
+ )
66
+
67
+ if __name__ == "__main__":
68
+ iface.launch()