Upload 5 files
Browse files- .env +1 -0
- .gitignore +6 -0
- README.md +70 -12
- app.py +185 -0
- requirements.txt +4 -0
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
API_KEY = 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
|
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
.env.local
|
5 |
+
.env.*.local
|
6 |
+
.DS_Store
|
README.md
CHANGED
@@ -1,12 +1,70 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# News Translator & Summarizer
|
2 |
+
|
3 |
+
A Streamlit-based web application that translates and summarizes news articles using the Groq API. This application allows users to input news article URLs and get translations and summaries in their preferred language.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- Article content extraction from URLs
|
8 |
+
- Article translation to multiple languages
|
9 |
+
- Article summarization
|
10 |
+
- Clean and intuitive user interface
|
11 |
+
- Secure API key management
|
12 |
+
|
13 |
+
## Project Structure
|
14 |
+
|
15 |
+
```
|
16 |
+
translation/
|
17 |
+
├── .env # Environment variables file (contains API keys)
|
18 |
+
├── .gitignore # Git ignore file for excluding sensitive data
|
19 |
+
├── .streamlit/ # Streamlit configuration directory
|
20 |
+
├── streamlit_app.py # Main application file
|
21 |
+
├── requirements.txt # Project dependencies
|
22 |
+
└── url/ # Directory containing URL-related data
|
23 |
+
```
|
24 |
+
|
25 |
+
## Files Description
|
26 |
+
|
27 |
+
- `streamlit_app.py`: The main application file containing the Streamlit web interface and core functionality for article processing, translation, and summarization.
|
28 |
+
- `.env`: Contains sensitive environment variables like API keys (not tracked in git)
|
29 |
+
- `.gitignore`: Specifies which files Git should ignore (includes .env and other sensitive files)
|
30 |
+
- `requirements.txt`: Lists all Python dependencies required to run the project
|
31 |
+
|
32 |
+
## Setup and Installation
|
33 |
+
|
34 |
+
1. Clone the repository
|
35 |
+
2. Install dependencies:
|
36 |
+
```bash
|
37 |
+
pip install -r requirements.txt
|
38 |
+
```
|
39 |
+
3. Create a `.env` file in the root directory and add your API key:
|
40 |
+
```
|
41 |
+
API_KEY=your_groq_api_key_here
|
42 |
+
```
|
43 |
+
4. Run the application:
|
44 |
+
```bash
|
45 |
+
streamlit run streamlit_app.py
|
46 |
+
```
|
47 |
+
|
48 |
+
## Dependencies
|
49 |
+
|
50 |
+
- streamlit (>=1.30.0): Web application framework
|
51 |
+
- requests (>=2.31.0): HTTP library for making API requests
|
52 |
+
- python-dotenv (>=1.0.0): Environment variable management
|
53 |
+
- beautifulsoup4 (>=4.12.0): Web scraping and HTML parsing
|
54 |
+
|
55 |
+
## Security
|
56 |
+
|
57 |
+
- API keys are stored in the `.env` file
|
58 |
+
- The `.gitignore` file prevents sensitive data from being committed to the repository
|
59 |
+
- Environment variables are loaded securely using python-dotenv
|
60 |
+
|
61 |
+
## Usage
|
62 |
+
|
63 |
+
1. Start the application using `streamlit run streamlit_app.py`
|
64 |
+
2. Enter a news article URL in the input field
|
65 |
+
3. Select your desired target language
|
66 |
+
4. Click the process button to get the translated and summarized content
|
67 |
+
|
68 |
+
## Note
|
69 |
+
|
70 |
+
Make sure to keep your API keys confidential and never commit them to version control. Always use the `.env` file for storing sensitive information.
|
app.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
from typing import Dict, Any
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
from urllib.parse import urlparse
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
# Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
# Configure page settings
|
13 |
+
st.set_page_config(
|
14 |
+
page_title="News Translator & Summarizer",
|
15 |
+
page_icon="📰",
|
16 |
+
layout="wide"
|
17 |
+
)
|
18 |
+
|
19 |
+
# Constants
|
20 |
+
GROQ_API_KEY = os.getenv("API_KEY") # Get API key from environment variable
|
21 |
+
GROQ_API_ENDPOINT = "https://api.groq.com/openai/v1/chat/completions"
|
22 |
+
MODEL = "llama-3.1-70b-versatile"
|
23 |
+
|
24 |
+
# Available languages for translation
|
25 |
+
LANGUAGES = [
|
26 |
+
"Telugu", "Hindi", "Tamil", "Bengali", "Marathi", "French",
|
27 |
+
"Spanish", "German", "Chinese", "Japanese", "Korean"
|
28 |
+
]
|
29 |
+
|
30 |
+
def fetch_article_content(url: str) -> str:
|
31 |
+
"""Fetch article content from URL."""
|
32 |
+
try:
|
33 |
+
response = requests.get(url)
|
34 |
+
response.raise_for_status()
|
35 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
36 |
+
|
37 |
+
# Remove unwanted elements
|
38 |
+
for tag in soup(['script', 'style', 'nav', 'header', 'footer', 'ads']):
|
39 |
+
tag.decompose()
|
40 |
+
|
41 |
+
# Get main content (adjust selectors based on target websites)
|
42 |
+
article_text = ""
|
43 |
+
main_content = soup.find('article') or soup.find(class_=['article', 'content', 'post'])
|
44 |
+
|
45 |
+
if main_content:
|
46 |
+
paragraphs = main_content.find_all('p')
|
47 |
+
article_text = ' '.join([p.get_text().strip() for p in paragraphs])
|
48 |
+
else:
|
49 |
+
# Fallback to all paragraphs if no article container found
|
50 |
+
paragraphs = soup.find_all('p')
|
51 |
+
article_text = ' '.join([p.get_text().strip() for p in paragraphs])
|
52 |
+
|
53 |
+
return article_text.strip()
|
54 |
+
except Exception as e:
|
55 |
+
raise Exception(f"Failed to fetch article content: {str(e)}")
|
56 |
+
|
57 |
+
def process_article(text: str, url: str, target_language: str) -> Dict[Any, Any]:
|
58 |
+
"""Process the article using Groq API for summary and translation."""
|
59 |
+
|
60 |
+
# Fetch content if URL is provided
|
61 |
+
if url and not text:
|
62 |
+
try:
|
63 |
+
text = fetch_article_content(url)
|
64 |
+
except Exception as e:
|
65 |
+
return {
|
66 |
+
"success": False,
|
67 |
+
"error": str(e)
|
68 |
+
}
|
69 |
+
|
70 |
+
messages = [
|
71 |
+
{
|
72 |
+
"role": "system",
|
73 |
+
"content": "You are a professional translator and summarizer. Provide concise summaries and accurate translations."
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"role": "user",
|
77 |
+
"content": f"""Please analyze this article and provide:
|
78 |
+
1. A concise summary in English (100-150 words)
|
79 |
+
2. An accurate translation of the summary in {target_language}
|
80 |
+
|
81 |
+
Article: {text}
|
82 |
+
|
83 |
+
Format your response exactly as:
|
84 |
+
SUMMARY: <english_summary>
|
85 |
+
TRANSLATION: <translated_summary>"""
|
86 |
+
}
|
87 |
+
]
|
88 |
+
|
89 |
+
headers = {
|
90 |
+
"Authorization": f"Bearer {GROQ_API_KEY}",
|
91 |
+
"Content-Type": "application/json"
|
92 |
+
}
|
93 |
+
|
94 |
+
payload = {
|
95 |
+
"model": MODEL,
|
96 |
+
"messages": messages,
|
97 |
+
"temperature": 0.3, # Lower temperature for more focused outputs
|
98 |
+
"max_tokens": 1024,
|
99 |
+
"top_p": 1
|
100 |
+
}
|
101 |
+
|
102 |
+
try:
|
103 |
+
response = requests.post(GROQ_API_ENDPOINT, headers=headers, json=payload)
|
104 |
+
response.raise_for_status()
|
105 |
+
result = response.json()
|
106 |
+
return {
|
107 |
+
"success": True,
|
108 |
+
"content": result["choices"][0]["message"]["content"]
|
109 |
+
}
|
110 |
+
except requests.exceptions.RequestException as e:
|
111 |
+
return {
|
112 |
+
"success": False,
|
113 |
+
"error": f"API request failed: {str(e)}"
|
114 |
+
}
|
115 |
+
except Exception as e:
|
116 |
+
return {
|
117 |
+
"success": False,
|
118 |
+
"error": f"Processing error: {str(e)}"
|
119 |
+
}
|
120 |
+
|
121 |
+
def main():
|
122 |
+
st.title("📰 News Translator & Summarizer")
|
123 |
+
st.write("Get summaries and translations of news articles using Groq's LLM")
|
124 |
+
|
125 |
+
# Input method selection
|
126 |
+
input_method = st.radio(
|
127 |
+
"Choose input method:",
|
128 |
+
["URL", "Text"],
|
129 |
+
horizontal=True
|
130 |
+
)
|
131 |
+
|
132 |
+
url = ""
|
133 |
+
text = ""
|
134 |
+
|
135 |
+
if input_method == "URL":
|
136 |
+
url = st.text_input(
|
137 |
+
"Enter article URL:",
|
138 |
+
placeholder="https://example.com/news-article"
|
139 |
+
)
|
140 |
+
else:
|
141 |
+
text = st.text_area(
|
142 |
+
"Enter article text:",
|
143 |
+
height=200
|
144 |
+
)
|
145 |
+
|
146 |
+
# Language selection
|
147 |
+
target_language = st.selectbox(
|
148 |
+
"Select target language for translation:",
|
149 |
+
LANGUAGES
|
150 |
+
)
|
151 |
+
|
152 |
+
if st.button("Process Article", type="primary"):
|
153 |
+
if (input_method == "URL" and url) or (input_method == "Text" and text):
|
154 |
+
with st.spinner("Processing article..."):
|
155 |
+
result = process_article(text, url, target_language)
|
156 |
+
|
157 |
+
if result["success"]:
|
158 |
+
# Parse the response
|
159 |
+
content = result["content"]
|
160 |
+
summary = ""
|
161 |
+
translation = ""
|
162 |
+
|
163 |
+
# Split content into summary and translation
|
164 |
+
parts = content.split("TRANSLATION:")
|
165 |
+
if len(parts) >= 2:
|
166 |
+
summary = parts[0].replace("SUMMARY:", "").strip()
|
167 |
+
translation = parts[1].strip()
|
168 |
+
|
169 |
+
# Display results in columns
|
170 |
+
col1, col2 = st.columns(2)
|
171 |
+
|
172 |
+
with col1:
|
173 |
+
st.subheader("English Summary")
|
174 |
+
st.write(summary)
|
175 |
+
|
176 |
+
with col2:
|
177 |
+
st.subheader(f"{target_language} Translation")
|
178 |
+
st.write(translation)
|
179 |
+
else:
|
180 |
+
st.error(f"Error processing article: {result['error']}")
|
181 |
+
else:
|
182 |
+
st.warning("Please provide either a URL or text content.")
|
183 |
+
|
184 |
+
if __name__ == "__main__":
|
185 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit>=1.30.0
|
2 |
+
requests>=2.31.0
|
3 |
+
python-dotenv>=1.0.0
|
4 |
+
beautifulsoup4>=4.12.0
|