shubhamprakash108's picture
Upload 3 files
33fd372 verified
# import json
# import os
# from utils import save_company_news
# from utils import sentiment_analysis_model
# from utils import news_summarization, audio_output, Topic_finder
# from collections import Counter
# import time
# import re
# from deep_translator import GoogleTranslator
# from pydub import AudioSegment
# import gc
# import torch
# print("Company News Summarization")
# company_name = input("Enter Company Name: ")
# if company_name:
# file_path = save_company_news(company_name)
# if os.path.exists(file_path):
# with open(file_path, "r", encoding="utf-8") as file:
# articles = json.load(file)
# for article in articles:
# print(f"\nTitle: {article['title']}")
# print(f"Content: {article['content'][:100]}...")
# print(f"Read more: {article['url']}")
# del articles
# gc.collect()
# else:
# print("Failed to fetch news. Try again.")
# else:
# print("Please enter a company name.")
# with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
# data = json.load(file)
# for article in data:
# topics = Topic_finder(article['title'])
# sentiment = sentiment_analysis_model(article['content'])
# article["sentiment"] = sentiment['sentiment']
# del sentiment
# gc.collect()
# summary = news_summarization(article["content"])
# article["summary"] = summary
# article["topics"] = topics
# if torch.cuda.is_available():
# torch.cuda.empty_cache()
# gc.collect()
# with open(f"Company/{company_name}.json", "w", encoding="utf-8") as file:
# json.dump(data, file, indent=4)
# with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
# articles = json.load(file)
# sentiment_counts = Counter(article["sentiment"] for article in articles)
# print("Sentiment Counts:")
# print("Positive:", sentiment_counts.get("Positive", 0))
# print("Negative:", sentiment_counts.get("Negative", 0))
# print("Neutral:", sentiment_counts.get("Neutral", 0))
# del articles
# del sentiment_counts
# gc.collect()
# with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
# data = json.load(file)
# translator = GoogleTranslator(source="en", target="hi")
# audio_folder = "audio"
# os.makedirs(audio_folder, exist_ok=True)
# for file in os.listdir(audio_folder):
# file_path = os.path.join(audio_folder, file)
# if os.path.isfile(file_path):
# os.remove(file_path)
# text_data = ""
# audio_files = []
# def split_text(text, max_length=4500):
# sentences = re.split(r'(?<=[.!?])\s+', text)
# chunks = []
# current_chunk = ""
# for sentence in sentences:
# if len(current_chunk) + len(sentence) + 1 <= max_length:
# current_chunk += " " + sentence if current_chunk else sentence
# else:
# chunks.append(current_chunk)
# current_chunk = sentence
# if current_chunk:
# chunks.append(current_chunk)
# return chunks
# for i, article in enumerate(data, start=1):
# title_translated = translator.translate(article['title'])
# content_chunks = split_text(article['content'])
# translated_chunks = []
# for chunk in content_chunks:
# try:
# translated_chunk = translator.translate(chunk)
# translated_chunks.append(translated_chunk)
# time.sleep(0.5)
# except Exception as e:
# print(f"Error translating chunk: {str(e)}")
# translated_chunks.append(f"Translation error: {str(e)}")
# content_translated = " ".join(translated_chunks)
# del content_chunks
# gc.collect()
# article_text = (f"अब, आप लेख संख्या {i} सुन रहे हैं जिसका शीर्षक है: {title_translated}\n"
# f"अब, आप लेख संख्या {i} की सामग्री सुन रहे हैं।\n"
# f"सामग्री: {content_translated}\n\n")
# text_data += article_text
# audio_file = f"{audio_folder}/article_{i}.mp3"
# audio_output(article_text, audio_file)
# audio_files.append(audio_file)
# del article_text
# del content_translated
# del translated_chunks
# gc.collect()
# if torch.cuda.is_available():
# torch.cuda.empty_cache()
# time.sleep(1)
# output_file = f"Company/{company_name}_translated.txt"
# with open(output_file, "w", encoding="utf-8") as file:
# file.write(text_data)
# del text_data
# gc.collect()
# def combine_audio_files(audio_folder, output_file):
# try:
# print(f"Combining audio files from {audio_folder}...")
# audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3') and f != os.path.basename(output_file)]
# if not audio_files:
# print("No audio files found to combine.")
# return False
# audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)
# print(f"Found {len(audio_files)} audio files to combine.")
# combined = AudioSegment.empty()
# for file in audio_files:
# file_path = os.path.join(audio_folder, file)
# try:
# audio = AudioSegment.from_mp3(file_path)
# combined += audio
# print(f"Added {file}")
# del audio
# gc.collect()
# except Exception as e:
# print(f"Error processing {file}: {str(e)}")
# combined.export(output_file, format="mp3")
# print(f"Successfully combined audio files into {output_file}")
# del combined
# gc.collect()
# return True
# except Exception as e:
# print(f"Error combining audio files: {str(e)}")
# return False
# audio_folder = "audio"
# output_file = "combined_news.mp3"
# combine_audio_files(audio_folder, output_file)
# print("Audio combining process completed!")
# if torch.cuda.is_available():
# torch.cuda.empty_cache()
# gc.collect()
import streamlit as st
import json
import os
from utils import save_company_news
from utils import sentiment_analysis_model
from utils import news_summarization, audio_output, Topic_finder
from collections import Counter
import time
import re
from deep_translator import GoogleTranslator
from pydub import AudioSegment
import gc
import torch
# Set page config
st.set_page_config(
page_title="Company News Summarization",
page_icon="📰",
layout="wide"
)
# Create necessary folders
os.makedirs("Company", exist_ok=True)
os.makedirs("audio", exist_ok=True)
def split_text(text, max_length=4500):
sentences = re.split(r'(?<=[.!?])\s+', text)
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) + 1 <= max_length:
current_chunk += " " + sentence if current_chunk else sentence
else:
chunks.append(current_chunk)
current_chunk = sentence
if current_chunk:
chunks.append(current_chunk)
return chunks
def combine_audio_files(audio_folder, output_file):
try:
st.info(f"Combining audio files from {audio_folder}...")
audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3') and f != os.path.basename(output_file)]
if not audio_files:
st.warning("No audio files found to combine.")
return False
audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)
st.info(f"Found {len(audio_files)} audio files to combine.")
combined = AudioSegment.empty()
for file in audio_files:
file_path = os.path.join(audio_folder, file)
try:
audio = AudioSegment.from_mp3(file_path)
combined += audio
del audio
gc.collect()
except Exception as e:
st.error(f"Error processing {file}: {str(e)}")
combined.export(output_file, format="mp3")
st.success(f"Successfully combined audio files into {output_file}")
del combined
gc.collect()
return True
except Exception as e:
st.error(f"Error combining audio files: {str(e)}")
return False
def process_company_news(company_name):
with st.spinner("Fetching company news..."):
file_path = save_company_news(company_name)
if not os.path.exists(file_path):
st.error("Failed to fetch news. Try again.")
return False
with open(file_path, "r", encoding="utf-8") as file:
articles = json.load(file)
st.success(f"Found {len(articles)} articles for {company_name}")
# Display a preview of the articles
with st.expander("Preview Articles"):
for article in articles:
st.subheader(article['title'])
st.write(f"{article['content'][:100]}...")
st.write(f"[Read more]({article['url']})")
del articles
gc.collect()
with st.spinner("Analyzing sentiment, extracting topics, and generating summaries..."):
progress_bar = st.progress(0)
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
data = json.load(file)
total_articles = len(data)
for i, article in enumerate(data):
topics = Topic_finder(article['title'])
sentiment = sentiment_analysis_model(article['content'])
article["sentiment"] = sentiment['sentiment']
del sentiment
gc.collect()
summary = news_summarization(article["content"])
article["summary"] = summary
article["topics"] = topics
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
progress_bar.progress((i + 1) / total_articles)
with open(f"Company/{company_name}.json", "w", encoding="utf-8") as file:
json.dump(data, file, indent=4)
with st.spinner("Counting sentiment..."):
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
articles = json.load(file)
sentiment_counts = Counter(article["sentiment"] for article in articles)
st.write("### Sentiment Analysis")
col1, col2, col3 = st.columns(3)
col1.metric("Positive", sentiment_counts.get("Positive", 0))
col2.metric("Negative", sentiment_counts.get("Negative", 0))
col3.metric("Neutral", sentiment_counts.get("Neutral", 0))
del articles
del sentiment_counts
gc.collect()
with st.spinner("Translating content and generating audio..."):
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
data = json.load(file)
translator = GoogleTranslator(source="en", target="hi")
audio_folder = "audio"
os.makedirs(audio_folder, exist_ok=True)
# Clear previous audio files
for file in os.listdir(audio_folder):
file_path = os.path.join(audio_folder, file)
if os.path.isfile(file_path):
os.remove(file_path)
text_data = ""
audio_files = []
progress_bar = st.progress(0)
for i, article in enumerate(data, start=1):
title_translated = translator.translate(article['title'])
content_chunks = split_text(article['content'])
translated_chunks = []
for chunk in content_chunks:
try:
translated_chunk = translator.translate(chunk)
translated_chunks.append(translated_chunk)
time.sleep(0.5)
except Exception as e:
st.error(f"Error translating chunk: {str(e)}")
translated_chunks.append(f"Translation error: {str(e)}")
content_translated = " ".join(translated_chunks)
del content_chunks
gc.collect()
article_text = (f"अब, आप लेख संख्या {i} सुन रहे हैं जिसका शीर्षक है: {title_translated}\n"
f"अब, आप लेख संख्या {i} की सामग्री सुन रहे हैं।\n"
f"सामग्री: {content_translated}\n\n")
text_data += article_text
audio_file = f"{audio_folder}/article_{i}.mp3"
audio_output(article_text, audio_file)
audio_files.append(audio_file)
del article_text
del content_translated
del translated_chunks
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
progress_bar.progress(i / len(data))
time.sleep(1)
output_file = f"Company/{company_name}_translated.txt"
with open(output_file, "w", encoding="utf-8") as file:
file.write(text_data)
del text_data
gc.collect()
with st.spinner("Combining audio files..."):
output_file = "combined_news.mp3"
combine_success = combine_audio_files(audio_folder, output_file)
if combine_success:
st.success("Audio combining process completed!")
else:
st.error("Failed to combine audio files.")
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
return True
# Main app interface
st.title("Company News Summarization and Audio Generation")
with st.sidebar:
st.header("Enter Company Details")
company_name = st.text_input("Company Name")
process_button = st.button("Process Company News", type="primary")
# Process data when button is clicked
if process_button and company_name:
success = process_company_news(company_name)
if success:
st.session_state.processing_complete = True
st.session_state.company_name = company_name
elif process_button and not company_name:
st.error("Please enter a company name.")
# Show results after processing
if 'processing_complete' in st.session_state and st.session_state.processing_complete:
company_name = st.session_state.company_name
st.header(f"Results for {company_name}")
# Create tabs for different outputs
tab1, tab2, tab3 = st.tabs(["Summary", "Translated Text", "Audio"])
with tab1:
st.subheader("News Summary")
try:
with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file:
articles = json.load(file)
for i, article in enumerate(articles, 1):
with st.expander(f"Article {i}: {article['title']}"):
st.write(f"**Summary:** {article['summary']}")
st.write(f"**Sentiment:** {article['sentiment']}")
st.write(f"**Topics:** {', '.join(article['topics'])}")
st.write(f"**URL:** {article['url']}")
except Exception as e:
st.error(f"Error loading summary data: {str(e)}")
with tab2:
st.subheader("Translated Text (Hindi)")
try:
with open(f"Company/{company_name}_translated.txt", "r", encoding="utf-8") as file:
text_content = file.read()
st.download_button(
label="Download Translated Text",
data=text_content,
file_name=f"{company_name}_translated.txt",
mime="text/plain"
)
st.text_area("Content", text_content, height=400)
except Exception as e:
st.error(f"Error loading translated text: {str(e)}")
with tab3:
st.subheader("Audio Files")
st.write("### Combined Audio")
try:
with open("combined_news.mp3", "rb") as file:
combined_audio_bytes = file.read()
st.audio(combined_audio_bytes, format="audio/mp3")
st.download_button(
label="Download Combined Audio",
data=combined_audio_bytes,
file_name="combined_news.mp3",
mime="audio/mp3"
)
except Exception as e:
st.error(f"Error loading combined audio: {str(e)}")
st.write("### Individual Article Audio Files")
try:
audio_files = [f for f in os.listdir("audio") if f.endswith('.mp3')]
audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0)
for audio_file in audio_files:
with st.expander(f"{audio_file}"):
with open(f"audio/{audio_file}", "rb") as file:
audio_bytes = file.read()
st.audio(audio_bytes, format="audio/mp3")
st.download_button(
label=f"Download {audio_file}",
data=audio_bytes,
file_name=audio_file,
mime="audio/mp3"
)
except Exception as e:
st.error(f"Error loading individual audio files: {str(e)}")
# Instructions at the bottom
with st.expander("How to use this app"):
st.write("""
1. Enter the name of a company in the sidebar.
2. Click 'Process Company News' button to start the analysis.
3. Wait for the processing to complete (this may take some time depending on the number of articles).
4. View the results in the different tabs:
- Summary: See sentiment analysis, topics, and summaries of each article
- Translated Text: View the Hindi translation of all articles
- Audio: Listen to or download the audio files in Hindi
""")