binuser007 commited on
Commit
02d7e31
·
verified ·
1 Parent(s): c991f22

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +1 -0
  2. .gitignore +6 -0
  3. README.md +70 -12
  4. app.py +185 -0
  5. requirements.txt +4 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ API_KEY = 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
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ *.pyc
4
+ .env.local
5
+ .env.*.local
6
+ .DS_Store
README.md CHANGED
@@ -1,12 +1,70 @@
1
- ---
2
- title: News Summarize And Translation
3
- emoji: 🐨
4
- colorFrom: blue
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.40.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # News Translator & Summarizer
2
+
3
+ A Streamlit-based web application that translates and summarizes news articles using the Groq API. This application allows users to input news article URLs and get translations and summaries in their preferred language.
4
+
5
+ ## Features
6
+
7
+ - Article content extraction from URLs
8
+ - Article translation to multiple languages
9
+ - Article summarization
10
+ - Clean and intuitive user interface
11
+ - Secure API key management
12
+
13
+ ## Project Structure
14
+
15
+ ```
16
+ translation/
17
+ ├── .env # Environment variables file (contains API keys)
18
+ ├── .gitignore # Git ignore file for excluding sensitive data
19
+ ├── .streamlit/ # Streamlit configuration directory
20
+ ├── streamlit_app.py # Main application file
21
+ ├── requirements.txt # Project dependencies
22
+ └── url/ # Directory containing URL-related data
23
+ ```
24
+
25
+ ## Files Description
26
+
27
+ - `streamlit_app.py`: The main application file containing the Streamlit web interface and core functionality for article processing, translation, and summarization.
28
+ - `.env`: Contains sensitive environment variables like API keys (not tracked in git)
29
+ - `.gitignore`: Specifies which files Git should ignore (includes .env and other sensitive files)
30
+ - `requirements.txt`: Lists all Python dependencies required to run the project
31
+
32
+ ## Setup and Installation
33
+
34
+ 1. Clone the repository
35
+ 2. Install dependencies:
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ ```
39
+ 3. Create a `.env` file in the root directory and add your API key:
40
+ ```
41
+ API_KEY=your_groq_api_key_here
42
+ ```
43
+ 4. Run the application:
44
+ ```bash
45
+ streamlit run streamlit_app.py
46
+ ```
47
+
48
+ ## Dependencies
49
+
50
+ - streamlit (>=1.30.0): Web application framework
51
+ - requests (>=2.31.0): HTTP library for making API requests
52
+ - python-dotenv (>=1.0.0): Environment variable management
53
+ - beautifulsoup4 (>=4.12.0): Web scraping and HTML parsing
54
+
55
+ ## Security
56
+
57
+ - API keys are stored in the `.env` file
58
+ - The `.gitignore` file prevents sensitive data from being committed to the repository
59
+ - Environment variables are loaded securely using python-dotenv
60
+
61
+ ## Usage
62
+
63
+ 1. Start the application using `streamlit run streamlit_app.py`
64
+ 2. Enter a news article URL in the input field
65
+ 3. Select your desired target language
66
+ 4. Click the process button to get the translated and summarized content
67
+
68
+ ## Note
69
+
70
+ Make sure to keep your API keys confidential and never commit them to version control. Always use the `.env` file for storing sensitive information.
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import os
4
+ from typing import Dict, Any
5
+ from bs4 import BeautifulSoup
6
+ from urllib.parse import urlparse
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Configure page settings
13
+ st.set_page_config(
14
+ page_title="News Translator & Summarizer",
15
+ page_icon="📰",
16
+ layout="wide"
17
+ )
18
+
19
+ # Constants
20
+ GROQ_API_KEY = os.getenv("API_KEY") # Get API key from environment variable
21
+ GROQ_API_ENDPOINT = "https://api.groq.com/openai/v1/chat/completions"
22
+ MODEL = "llama-3.1-70b-versatile"
23
+
24
+ # Available languages for translation
25
+ LANGUAGES = [
26
+ "Telugu", "Hindi", "Tamil", "Bengali", "Marathi", "French",
27
+ "Spanish", "German", "Chinese", "Japanese", "Korean"
28
+ ]
29
+
30
+ def fetch_article_content(url: str) -> str:
31
+ """Fetch article content from URL."""
32
+ try:
33
+ response = requests.get(url)
34
+ response.raise_for_status()
35
+ soup = BeautifulSoup(response.text, 'html.parser')
36
+
37
+ # Remove unwanted elements
38
+ for tag in soup(['script', 'style', 'nav', 'header', 'footer', 'ads']):
39
+ tag.decompose()
40
+
41
+ # Get main content (adjust selectors based on target websites)
42
+ article_text = ""
43
+ main_content = soup.find('article') or soup.find(class_=['article', 'content', 'post'])
44
+
45
+ if main_content:
46
+ paragraphs = main_content.find_all('p')
47
+ article_text = ' '.join([p.get_text().strip() for p in paragraphs])
48
+ else:
49
+ # Fallback to all paragraphs if no article container found
50
+ paragraphs = soup.find_all('p')
51
+ article_text = ' '.join([p.get_text().strip() for p in paragraphs])
52
+
53
+ return article_text.strip()
54
+ except Exception as e:
55
+ raise Exception(f"Failed to fetch article content: {str(e)}")
56
+
57
+ def process_article(text: str, url: str, target_language: str) -> Dict[Any, Any]:
58
+ """Process the article using Groq API for summary and translation."""
59
+
60
+ # Fetch content if URL is provided
61
+ if url and not text:
62
+ try:
63
+ text = fetch_article_content(url)
64
+ except Exception as e:
65
+ return {
66
+ "success": False,
67
+ "error": str(e)
68
+ }
69
+
70
+ messages = [
71
+ {
72
+ "role": "system",
73
+ "content": "You are a professional translator and summarizer. Provide concise summaries and accurate translations."
74
+ },
75
+ {
76
+ "role": "user",
77
+ "content": f"""Please analyze this article and provide:
78
+ 1. A concise summary in English (100-150 words)
79
+ 2. An accurate translation of the summary in {target_language}
80
+
81
+ Article: {text}
82
+
83
+ Format your response exactly as:
84
+ SUMMARY: <english_summary>
85
+ TRANSLATION: <translated_summary>"""
86
+ }
87
+ ]
88
+
89
+ headers = {
90
+ "Authorization": f"Bearer {GROQ_API_KEY}",
91
+ "Content-Type": "application/json"
92
+ }
93
+
94
+ payload = {
95
+ "model": MODEL,
96
+ "messages": messages,
97
+ "temperature": 0.3, # Lower temperature for more focused outputs
98
+ "max_tokens": 1024,
99
+ "top_p": 1
100
+ }
101
+
102
+ try:
103
+ response = requests.post(GROQ_API_ENDPOINT, headers=headers, json=payload)
104
+ response.raise_for_status()
105
+ result = response.json()
106
+ return {
107
+ "success": True,
108
+ "content": result["choices"][0]["message"]["content"]
109
+ }
110
+ except requests.exceptions.RequestException as e:
111
+ return {
112
+ "success": False,
113
+ "error": f"API request failed: {str(e)}"
114
+ }
115
+ except Exception as e:
116
+ return {
117
+ "success": False,
118
+ "error": f"Processing error: {str(e)}"
119
+ }
120
+
121
+ def main():
122
+ st.title("📰 News Translator & Summarizer")
123
+ st.write("Get summaries and translations of news articles using Groq's LLM")
124
+
125
+ # Input method selection
126
+ input_method = st.radio(
127
+ "Choose input method:",
128
+ ["URL", "Text"],
129
+ horizontal=True
130
+ )
131
+
132
+ url = ""
133
+ text = ""
134
+
135
+ if input_method == "URL":
136
+ url = st.text_input(
137
+ "Enter article URL:",
138
+ placeholder="https://example.com/news-article"
139
+ )
140
+ else:
141
+ text = st.text_area(
142
+ "Enter article text:",
143
+ height=200
144
+ )
145
+
146
+ # Language selection
147
+ target_language = st.selectbox(
148
+ "Select target language for translation:",
149
+ LANGUAGES
150
+ )
151
+
152
+ if st.button("Process Article", type="primary"):
153
+ if (input_method == "URL" and url) or (input_method == "Text" and text):
154
+ with st.spinner("Processing article..."):
155
+ result = process_article(text, url, target_language)
156
+
157
+ if result["success"]:
158
+ # Parse the response
159
+ content = result["content"]
160
+ summary = ""
161
+ translation = ""
162
+
163
+ # Split content into summary and translation
164
+ parts = content.split("TRANSLATION:")
165
+ if len(parts) >= 2:
166
+ summary = parts[0].replace("SUMMARY:", "").strip()
167
+ translation = parts[1].strip()
168
+
169
+ # Display results in columns
170
+ col1, col2 = st.columns(2)
171
+
172
+ with col1:
173
+ st.subheader("English Summary")
174
+ st.write(summary)
175
+
176
+ with col2:
177
+ st.subheader(f"{target_language} Translation")
178
+ st.write(translation)
179
+ else:
180
+ st.error(f"Error processing article: {result['error']}")
181
+ else:
182
+ st.warning("Please provide either a URL or text content.")
183
+
184
+ if __name__ == "__main__":
185
+ main()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit>=1.30.0
2
+ requests>=2.31.0
3
+ python-dotenv>=1.0.0
4
+ beautifulsoup4>=4.12.0