|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import streamlit as st
|
|
import json
|
|
import os
|
|
from utils import save_company_news
|
|
from utils import sentiment_analysis_model
|
|
from utils import news_summarization, audio_output, Topic_finder
|
|
from collections import Counter
|
|
import time
|
|
import re
|
|
from deep_translator import GoogleTranslator
|
|
from pydub import AudioSegment
|
|
import gc
|
|
import torch
|
|
|
|
|
|
st.set_page_config(
|
|
page_title="Company News Summarization",
|
|
page_icon="📰",
|
|
layout="wide"
|
|
)
|
|
|
|
|
|
os.makedirs("Company", exist_ok=True)
|
|
os.makedirs("audio", exist_ok=True)
|
|
|
|
def split_text(text, max_length=4500):
|
|
sentences = re.split(r'(?<=[.!?])\s+', text)
|
|
chunks = []
|
|
current_chunk = ""
|
|
|
|
for sentence in sentences:
|
|
if len(current_chunk) + len(sentence) + 1 <= max_length:
|
|
current_chunk += " " + sentence if current_chunk else sentence
|
|
else:
|
|
chunks.append(current_chunk)
|
|
current_chunk = sentence
|
|
|
|
if current_chunk:
|
|
chunks.append(current_chunk)
|
|
|
|
return chunks
|
|
|
|
def combine_audio_files(audio_folder, output_file):
|
|
try:
|
|
st.info(f"Combining audio files from {audio_folder}...")
|
|
audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3') and f != os.path.basename(output_file)]
|
|
|
|
if not audio_files:
|
|
st.warning("No audio files found to combine.")
|
|
return False
|
|
|
|
audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)
|
|
st.info(f"Found {len(audio_files)} audio files to combine.")
|
|
|
|
combined = AudioSegment.empty()
|
|
|
|
for file in audio_files:
|
|
file_path = os.path.join(audio_folder, file)
|
|
try:
|
|
audio = AudioSegment.from_mp3(file_path)
|
|
combined += audio
|
|
|
|
del audio
|
|
gc.collect()
|
|
except Exception as e:
|
|
st.error(f"Error processing {file}: {str(e)}")
|
|
|
|
combined.export(output_file, format="mp3")
|
|
st.success(f"Successfully combined audio files into {output_file}")
|
|
|
|
del combined
|
|
gc.collect()
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
st.error(f"Error combining audio files: {str(e)}")
|
|
return False
|
|
|
|
def process_company_news(company_name):
|
|
with st.spinner("Fetching company news..."):
|
|
file_path = save_company_news(company_name)
|
|
|
|
if not os.path.exists(file_path):
|
|
st.error("Failed to fetch news. Try again.")
|
|
return False
|
|
|
|
with open(file_path, "r", encoding="utf-8") as file:
|
|
articles = json.load(file)
|
|
|
|
st.success(f"Found {len(articles)} articles for {company_name}")
|
|
|
|
|
|
with st.expander("Preview Articles"):
|
|
for article in articles:
|
|
st.subheader(article['title'])
|
|
st.write(f"{article['content'][:100]}...")
|
|
st.write(f"[Read more]({article['url']})")
|
|
|
|
del articles
|
|
gc.collect()
|
|
|
|
with st.spinner("Analyzing sentiment, extracting topics, and generating summaries..."):
|
|
progress_bar = st.progress(0)
|
|
|
|
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
|
|
data = json.load(file)
|
|
|
|
total_articles = len(data)
|
|
|
|
for i, article in enumerate(data):
|
|
topics = Topic_finder(article['title'])
|
|
|
|
sentiment = sentiment_analysis_model(article['content'])
|
|
article["sentiment"] = sentiment['sentiment']
|
|
|
|
del sentiment
|
|
gc.collect()
|
|
|
|
summary = news_summarization(article["content"])
|
|
article["summary"] = summary
|
|
|
|
article["topics"] = topics
|
|
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
|
|
gc.collect()
|
|
progress_bar.progress((i + 1) / total_articles)
|
|
|
|
with open(f"Company/{company_name}.json", "w", encoding="utf-8") as file:
|
|
json.dump(data, file, indent=4)
|
|
|
|
with st.spinner("Counting sentiment..."):
|
|
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
|
|
articles = json.load(file)
|
|
|
|
sentiment_counts = Counter(article["sentiment"] for article in articles)
|
|
|
|
st.write("### Sentiment Analysis")
|
|
col1, col2, col3 = st.columns(3)
|
|
col1.metric("Positive", sentiment_counts.get("Positive", 0))
|
|
col2.metric("Negative", sentiment_counts.get("Negative", 0))
|
|
col3.metric("Neutral", sentiment_counts.get("Neutral", 0))
|
|
|
|
del articles
|
|
del sentiment_counts
|
|
gc.collect()
|
|
|
|
with st.spinner("Translating content and generating audio..."):
|
|
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
|
|
data = json.load(file)
|
|
|
|
translator = GoogleTranslator(source="en", target="hi")
|
|
|
|
audio_folder = "audio"
|
|
os.makedirs(audio_folder, exist_ok=True)
|
|
|
|
|
|
for file in os.listdir(audio_folder):
|
|
file_path = os.path.join(audio_folder, file)
|
|
if os.path.isfile(file_path):
|
|
os.remove(file_path)
|
|
|
|
text_data = ""
|
|
audio_files = []
|
|
|
|
progress_bar = st.progress(0)
|
|
|
|
for i, article in enumerate(data, start=1):
|
|
title_translated = translator.translate(article['title'])
|
|
|
|
content_chunks = split_text(article['content'])
|
|
translated_chunks = []
|
|
|
|
for chunk in content_chunks:
|
|
try:
|
|
translated_chunk = translator.translate(chunk)
|
|
translated_chunks.append(translated_chunk)
|
|
time.sleep(0.5)
|
|
except Exception as e:
|
|
st.error(f"Error translating chunk: {str(e)}")
|
|
translated_chunks.append(f"Translation error: {str(e)}")
|
|
|
|
content_translated = " ".join(translated_chunks)
|
|
|
|
del content_chunks
|
|
gc.collect()
|
|
|
|
article_text = (f"अब, आप लेख संख्या {i} सुन रहे हैं जिसका शीर्षक है: {title_translated}\n"
|
|
f"अब, आप लेख संख्या {i} की सामग्री सुन रहे हैं।\n"
|
|
f"सामग्री: {content_translated}\n\n")
|
|
|
|
text_data += article_text
|
|
|
|
audio_file = f"{audio_folder}/article_{i}.mp3"
|
|
audio_output(article_text, audio_file)
|
|
audio_files.append(audio_file)
|
|
|
|
del article_text
|
|
del content_translated
|
|
del translated_chunks
|
|
gc.collect()
|
|
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
|
|
progress_bar.progress(i / len(data))
|
|
time.sleep(1)
|
|
|
|
output_file = f"Company/{company_name}_translated.txt"
|
|
with open(output_file, "w", encoding="utf-8") as file:
|
|
file.write(text_data)
|
|
|
|
del text_data
|
|
gc.collect()
|
|
|
|
with st.spinner("Combining audio files..."):
|
|
output_file = "combined_news.mp3"
|
|
combine_success = combine_audio_files(audio_folder, output_file)
|
|
|
|
if combine_success:
|
|
st.success("Audio combining process completed!")
|
|
else:
|
|
st.error("Failed to combine audio files.")
|
|
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
|
|
gc.collect()
|
|
|
|
return True
|
|
|
|
|
|
st.title("Company News Summarization and Audio Generation")
|
|
|
|
with st.sidebar:
|
|
st.header("Enter Company Details")
|
|
company_name = st.text_input("Company Name")
|
|
process_button = st.button("Process Company News", type="primary")
|
|
|
|
|
|
if process_button and company_name:
|
|
success = process_company_news(company_name)
|
|
if success:
|
|
st.session_state.processing_complete = True
|
|
st.session_state.company_name = company_name
|
|
elif process_button and not company_name:
|
|
st.error("Please enter a company name.")
|
|
|
|
|
|
if 'processing_complete' in st.session_state and st.session_state.processing_complete:
|
|
company_name = st.session_state.company_name
|
|
|
|
st.header(f"Results for {company_name}")
|
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["Summary", "Translated Text", "Audio"])
|
|
|
|
with tab1:
|
|
st.subheader("News Summary")
|
|
try:
|
|
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
|
|
articles = json.load(file)
|
|
|
|
for i, article in enumerate(articles, 1):
|
|
with st.expander(f"Article {i}: {article['title']}"):
|
|
st.write(f"**Summary:** {article['summary']}")
|
|
st.write(f"**Sentiment:** {article['sentiment']}")
|
|
st.write(f"**Topics:** {', '.join(article['topics'])}")
|
|
st.write(f"**URL:** {article['url']}")
|
|
except Exception as e:
|
|
st.error(f"Error loading summary data: {str(e)}")
|
|
|
|
with tab2:
|
|
st.subheader("Translated Text (Hindi)")
|
|
try:
|
|
with open(f"Company/{company_name}_translated.txt", "r", encoding="utf-8") as file:
|
|
text_content = file.read()
|
|
st.download_button(
|
|
label="Download Translated Text",
|
|
data=text_content,
|
|
file_name=f"{company_name}_translated.txt",
|
|
mime="text/plain"
|
|
)
|
|
st.text_area("Content", text_content, height=400)
|
|
except Exception as e:
|
|
st.error(f"Error loading translated text: {str(e)}")
|
|
|
|
with tab3:
|
|
st.subheader("Audio Files")
|
|
|
|
st.write("### Combined Audio")
|
|
try:
|
|
with open("combined_news.mp3", "rb") as file:
|
|
combined_audio_bytes = file.read()
|
|
|
|
st.audio(combined_audio_bytes, format="audio/mp3")
|
|
st.download_button(
|
|
label="Download Combined Audio",
|
|
data=combined_audio_bytes,
|
|
file_name="combined_news.mp3",
|
|
mime="audio/mp3"
|
|
)
|
|
except Exception as e:
|
|
st.error(f"Error loading combined audio: {str(e)}")
|
|
|
|
st.write("### Individual Article Audio Files")
|
|
try:
|
|
audio_files = [f for f in os.listdir("audio") if f.endswith('.mp3')]
|
|
audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)
|
|
|
|
for audio_file in audio_files:
|
|
with st.expander(f"{audio_file}"):
|
|
with open(f"audio/{audio_file}", "rb") as file:
|
|
audio_bytes = file.read()
|
|
st.audio(audio_bytes, format="audio/mp3")
|
|
st.download_button(
|
|
label=f"Download {audio_file}",
|
|
data=audio_bytes,
|
|
file_name=audio_file,
|
|
mime="audio/mp3"
|
|
)
|
|
except Exception as e:
|
|
st.error(f"Error loading individual audio files: {str(e)}")
|
|
|
|
|
|
with st.expander("How to use this app"):
|
|
st.write("""
|
|
1. Enter the name of a company in the sidebar.
|
|
2. Click 'Process Company News' button to start the analysis.
|
|
3. Wait for the processing to complete (this may take some time depending on the number of articles).
|
|
4. View the results in the different tabs:
|
|
- Summary: See sentiment analysis, topics, and summaries of each article
|
|
- Translated Text: View the Hindi translation of all articles
|
|
- Audio: Listen to or download the audio files in Hindi
|
|
""")
|
|
|
|
|