# import json # import os # from utils import save_company_news # from utils import sentiment_analysis_model # from utils import news_summarization, audio_output, Topic_finder # from collections import Counter # import time # import re # from deep_translator import GoogleTranslator # from pydub import AudioSegment # import gc # import torch # print("Company News Summarization") # company_name = input("Enter Company Name: ") # if company_name: # file_path = save_company_news(company_name) # if os.path.exists(file_path): # with open(file_path, "r", encoding="utf-8") as file: # articles = json.load(file) # for article in articles: # print(f"\nTitle: {article['title']}") # print(f"Content: {article['content'][:100]}...") # print(f"Read more: {article['url']}") # del articles # gc.collect() # else: # print("Failed to fetch news. Try again.") # else: # print("Please enter a company name.") # with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file: # data = json.load(file) # for article in data: # topics = Topic_finder(article['title']) # sentiment = sentiment_analysis_model(article['content']) # article["sentiment"] = sentiment['sentiment'] # del sentiment # gc.collect() # summary = news_summarization(article["content"]) # article["summary"] = summary # article["topics"] = topics # if torch.cuda.is_available(): # torch.cuda.empty_cache() # gc.collect() # with open(f"Company/{company_name}.json", "w", encoding="utf-8") as file: # json.dump(data, file, indent=4) # with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file: # articles = json.load(file) # sentiment_counts = Counter(article["sentiment"] for article in articles) # print("Sentiment Counts:") # print("Positive:", sentiment_counts.get("Positive", 0)) # print("Negative:", sentiment_counts.get("Negative", 0)) # print("Neutral:", sentiment_counts.get("Neutral", 0)) # del articles # del sentiment_counts # gc.collect() # with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file: # data = json.load(file) # translator = GoogleTranslator(source="en", target="hi") # audio_folder = "audio" # os.makedirs(audio_folder, exist_ok=True) # for file in os.listdir(audio_folder): # file_path = os.path.join(audio_folder, file) # if os.path.isfile(file_path): # os.remove(file_path) # text_data = "" # audio_files = [] # def split_text(text, max_length=4500): # sentences = re.split(r'(?<=[.!?])\s+', text) # chunks = [] # current_chunk = "" # for sentence in sentences: # if len(current_chunk) + len(sentence) + 1 <= max_length: # current_chunk += " " + sentence if current_chunk else sentence # else: # chunks.append(current_chunk) # current_chunk = sentence # if current_chunk: # chunks.append(current_chunk) # return chunks # for i, article in enumerate(data, start=1): # title_translated = translator.translate(article['title']) # content_chunks = split_text(article['content']) # translated_chunks = [] # for chunk in content_chunks: # try: # translated_chunk = translator.translate(chunk) # translated_chunks.append(translated_chunk) # time.sleep(0.5) # except Exception as e: # print(f"Error translating chunk: {str(e)}") # translated_chunks.append(f"Translation error: {str(e)}") # content_translated = " ".join(translated_chunks) # del content_chunks # gc.collect() # article_text = (f"अब, आप लेख संख्या {i} सुन रहे हैं जिसका शीर्षक है: {title_translated}\n" # f"अब, आप लेख संख्या {i} की सामग्री सुन रहे हैं।\n" # f"सामग्री: {content_translated}\n\n") # text_data += article_text # audio_file = f"{audio_folder}/article_{i}.mp3" # audio_output(article_text, audio_file) # audio_files.append(audio_file) # del article_text # del content_translated # del translated_chunks # gc.collect() # if torch.cuda.is_available(): # torch.cuda.empty_cache() # time.sleep(1) # output_file = f"Company/{company_name}_translated.txt" # with open(output_file, "w", encoding="utf-8") as file: # file.write(text_data) # del text_data # gc.collect() # def combine_audio_files(audio_folder, output_file): # try: # print(f"Combining audio files from {audio_folder}...") # audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3') and f != os.path.basename(output_file)] # if not audio_files: # print("No audio files found to combine.") # return False # audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0) # print(f"Found {len(audio_files)} audio files to combine.") # combined = AudioSegment.empty() # for file in audio_files: # file_path = os.path.join(audio_folder, file) # try: # audio = AudioSegment.from_mp3(file_path) # combined += audio # print(f"Added {file}") # del audio # gc.collect() # except Exception as e: # print(f"Error processing {file}: {str(e)}") # combined.export(output_file, format="mp3") # print(f"Successfully combined audio files into {output_file}") # del combined # gc.collect() # return True # except Exception as e: # print(f"Error combining audio files: {str(e)}") # return False # audio_folder = "audio" # output_file = "combined_news.mp3" # combine_audio_files(audio_folder, output_file) # print("Audio combining process completed!") # if torch.cuda.is_available(): # torch.cuda.empty_cache() # gc.collect() import streamlit as st import json import os from utils import save_company_news from utils import sentiment_analysis_model from utils import news_summarization, audio_output, Topic_finder from collections import Counter import time import re from deep_translator import GoogleTranslator from pydub import AudioSegment import gc import torch # Set page config st.set_page_config( page_title="Company News Summarization", page_icon="📰", layout="wide" ) # Create necessary folders os.makedirs("Company", exist_ok=True) os.makedirs("audio", exist_ok=True) def split_text(text, max_length=4500): sentences = re.split(r'(?<=[.!?])\s+', text) chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk) + len(sentence) + 1 <= max_length: current_chunk += " " + sentence if current_chunk else sentence else: chunks.append(current_chunk) current_chunk = sentence if current_chunk: chunks.append(current_chunk) return chunks def combine_audio_files(audio_folder, output_file): try: st.info(f"Combining audio files from {audio_folder}...") audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3') and f != os.path.basename(output_file)] if not audio_files: st.warning("No audio files found to combine.") return False audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0) st.info(f"Found {len(audio_files)} audio files to combine.") combined = AudioSegment.empty() for file in audio_files: file_path = os.path.join(audio_folder, file) try: audio = AudioSegment.from_mp3(file_path) combined += audio del audio gc.collect() except Exception as e: st.error(f"Error processing {file}: {str(e)}") combined.export(output_file, format="mp3") st.success(f"Successfully combined audio files into {output_file}") del combined gc.collect() return True except Exception as e: st.error(f"Error combining audio files: {str(e)}") return False def process_company_news(company_name): with st.spinner("Fetching company news..."): file_path = save_company_news(company_name) if not os.path.exists(file_path): st.error("Failed to fetch news. Try again.") return False with open(file_path, "r", encoding="utf-8") as file: articles = json.load(file) st.success(f"Found {len(articles)} articles for {company_name}") # Display a preview of the articles with st.expander("Preview Articles"): for article in articles: st.subheader(article['title']) st.write(f"{article['content'][:100]}...") st.write(f"[Read more]({article['url']})") del articles gc.collect() with st.spinner("Analyzing sentiment, extracting topics, and generating summaries..."): progress_bar = st.progress(0) with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file: data = json.load(file) total_articles = len(data) for i, article in enumerate(data): topics = Topic_finder(article['title']) sentiment = sentiment_analysis_model(article['content']) article["sentiment"] = sentiment['sentiment'] del sentiment gc.collect() summary = news_summarization(article["content"]) article["summary"] = summary article["topics"] = topics if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() progress_bar.progress((i + 1) / total_articles) with open(f"Company/{company_name}.json", "w", encoding="utf-8") as file: json.dump(data, file, indent=4) with st.spinner("Counting sentiment..."): with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file: articles = json.load(file) sentiment_counts = Counter(article["sentiment"] for article in articles) st.write("### Sentiment Analysis") col1, col2, col3 = st.columns(3) col1.metric("Positive", sentiment_counts.get("Positive", 0)) col2.metric("Negative", sentiment_counts.get("Negative", 0)) col3.metric("Neutral", sentiment_counts.get("Neutral", 0)) del articles del sentiment_counts gc.collect() with st.spinner("Translating content and generating audio..."): with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file: data = json.load(file) translator = GoogleTranslator(source="en", target="hi") audio_folder = "audio" os.makedirs(audio_folder, exist_ok=True) # Clear previous audio files for file in os.listdir(audio_folder): file_path = os.path.join(audio_folder, file) if os.path.isfile(file_path): os.remove(file_path) text_data = "" audio_files = [] progress_bar = st.progress(0) for i, article in enumerate(data, start=1): title_translated = translator.translate(article['title']) content_chunks = split_text(article['content']) translated_chunks = [] for chunk in content_chunks: try: translated_chunk = translator.translate(chunk) translated_chunks.append(translated_chunk) time.sleep(0.5) except Exception as e: st.error(f"Error translating chunk: {str(e)}") translated_chunks.append(f"Translation error: {str(e)}") content_translated = " ".join(translated_chunks) del content_chunks gc.collect() article_text = (f"अब, आप लेख संख्या {i} सुन रहे हैं जिसका शीर्षक है: {title_translated}\n" f"अब, आप लेख संख्या {i} की सामग्री सुन रहे हैं।\n" f"सामग्री: {content_translated}\n\n") text_data += article_text audio_file = f"{audio_folder}/article_{i}.mp3" audio_output(article_text, audio_file) audio_files.append(audio_file) del article_text del content_translated del translated_chunks gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() progress_bar.progress(i / len(data)) time.sleep(1) output_file = f"Company/{company_name}_translated.txt" with open(output_file, "w", encoding="utf-8") as file: file.write(text_data) del text_data gc.collect() with st.spinner("Combining audio files..."): output_file = "combined_news.mp3" combine_success = combine_audio_files(audio_folder, output_file) if combine_success: st.success("Audio combining process completed!") else: st.error("Failed to combine audio files.") if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() return True # Main app interface st.title("Company News Summarization and Audio Generation") with st.sidebar: st.header("Enter Company Details") company_name = st.text_input("Company Name") process_button = st.button("Process Company News", type="primary") # Process data when button is clicked if process_button and company_name: success = process_company_news(company_name) if success: st.session_state.processing_complete = True st.session_state.company_name = company_name elif process_button and not company_name: st.error("Please enter a company name.") # Show results after processing if 'processing_complete' in st.session_state and st.session_state.processing_complete: company_name = st.session_state.company_name st.header(f"Results for {company_name}") # Create tabs for different outputs tab1, tab2, tab3 = st.tabs(["Summary", "Translated Text", "Audio"]) with tab1: st.subheader("News Summary") try: with open(f"Company/{company_name}.json", "r", encoding="utf-8") as file: articles = json.load(file) for i, article in enumerate(articles, 1): with st.expander(f"Article {i}: {article['title']}"): st.write(f"**Summary:** {article['summary']}") st.write(f"**Sentiment:** {article['sentiment']}") st.write(f"**Topics:** {', '.join(article['topics'])}") st.write(f"**URL:** {article['url']}") except Exception as e: st.error(f"Error loading summary data: {str(e)}") with tab2: st.subheader("Translated Text (Hindi)") try: with open(f"Company/{company_name}_translated.txt", "r", encoding="utf-8") as file: text_content = file.read() st.download_button( label="Download Translated Text", data=text_content, file_name=f"{company_name}_translated.txt", mime="text/plain" ) st.text_area("Content", text_content, height=400) except Exception as e: st.error(f"Error loading translated text: {str(e)}") with tab3: st.subheader("Audio Files") st.write("### Combined Audio") try: with open("combined_news.mp3", "rb") as file: combined_audio_bytes = file.read() st.audio(combined_audio_bytes, format="audio/mp3") st.download_button( label="Download Combined Audio", data=combined_audio_bytes, file_name="combined_news.mp3", mime="audio/mp3" ) except Exception as e: st.error(f"Error loading combined audio: {str(e)}") st.write("### Individual Article Audio Files") try: audio_files = [f for f in os.listdir("audio") if f.endswith('.mp3')] audio_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]) if x.split('_')[-1].split('.')[0].isdigit() else 0) for audio_file in audio_files: with st.expander(f"{audio_file}"): with open(f"audio/{audio_file}", "rb") as file: audio_bytes = file.read() st.audio(audio_bytes, format="audio/mp3") st.download_button( label=f"Download {audio_file}", data=audio_bytes, file_name=audio_file, mime="audio/mp3" ) except Exception as e: st.error(f"Error loading individual audio files: {str(e)}") # Instructions at the bottom with st.expander("How to use this app"): st.write(""" 1. Enter the name of a company in the sidebar. 2. Click 'Process Company News' button to start the analysis. 3. Wait for the processing to complete (this may take some time depending on the number of articles). 4. View the results in the different tabs: - Summary: See sentiment analysis, topics, and summaries of each article - Translated Text: View the Hindi translation of all articles - Audio: Listen to or download the audio files in Hindi """)