ProfessorLeVesseur commited on
Commit
ed6a185
·
verified ·
1 Parent(s): ae49dc9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +235 -0
app.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import os
4
+ import PyPDF2
5
+ import docx
6
+ import time
7
+
8
+ # Set the title of the app
9
+ st.title("Galactic Babel Fish Translator")
10
+
11
+ # Description
12
+ st.write("""
13
+ Choose a target language, enter your text or upload a document, and click **Translate** to get the translated text.
14
+ """)
15
+
16
+ #------------------------------------------------------------------------
17
+ # Configurations
18
+ #------------------------------------------------------------------------
19
+ # Streamlit page setup
20
+ st.set_page_config(
21
+ page_title="Text Translator",
22
+ page_icon=":speech_balloon:",
23
+ layout="centered",
24
+ initial_sidebar_state="auto",
25
+ menu_items={
26
+ 'Get Help': 'mailto:[email protected]',
27
+ 'About': "This app is built to support translation tasks"
28
+ }
29
+ )
30
+
31
+ #------------------------------------------------------------------------
32
+ # Sidebar
33
+ #------------------------------------------------------------------------
34
+ with st.sidebar:
35
+ # Password input field
36
+ # password = st.text_input("Enter Password:", type="password")
37
+
38
+ # Set the desired width in pixels
39
+ image_width = 300
40
+ # Define the path to the image
41
+ image_path = "/Users/cheynelevesseur/Desktop/Manual Library/Python_Code/LLM_Projects_1/intervention_analysis_app/mimtss_logo.png"
42
+ # Display the image
43
+ st.image(image_path, width=image_width)
44
+
45
+ # Toggle for Help and Report a Bug
46
+ with st.expander("Need help and report a bug"):
47
+ st.write("""
48
+ **Contact**: Cheyne LeVesseur, PhD
49
+ **Email**: [email protected]
50
+ """)
51
+ st.divider()
52
+ st.subheader('User Instructions')
53
+
54
+ # Principles text with Markdown formatting
55
+ User_Instructions = """
56
+
57
+ - **Step 1**: Provide either text input or upload a document for translation.
58
+ - **Step 2**: Click Translate.
59
+ - **Step 3**: Sit back, relax, and let the magic happen!
60
+
61
+ """
62
+ st.markdown(User_Instructions)
63
+
64
+ #------------------------------------------------------------------------
65
+ # Functions
66
+ #------------------------------------------------------------------------
67
+
68
+ # Language to model mapping
69
+ language_model_mapping = {
70
+ "Spanish": "Helsinki-NLP/opus-mt-en-es",
71
+ "Arabic": "Helsinki-NLP/opus-mt-en-ar",
72
+ "Chinese": "Helsinki-NLP/opus-mt-en-zh",
73
+ "Albanian": "Helsinki-NLP/opus-mt-en-sq",
74
+ "French": "Helsinki-NLP/opus-mt-en-fr",
75
+ "German": "Helsinki-NLP/opus-mt-en-de",
76
+ "Japanese": "Helsinki-NLP/opus-mt-en-jap",
77
+ "Italian": "Helsinki-NLP/opus-mt-en-it",
78
+ "Dutch": "Helsinki-NLP/opus-mt-en-nl",
79
+ "Hindi": "Helsinki-NLP/opus-mt-en-hi",
80
+ "Russian": "Helsinki-NLP/opus-mt-en-ru",
81
+ "Indonesian": "Helsinki-NLP/opus-mt-en-id",
82
+ "Greek": "Helsinki-NLP/opus-mt-en-el",
83
+ "Danish": "Helsinki-NLP/opus-mt-en-da",
84
+ "Swedish": "Helsinki-NLP/opus-mt-en-sv",
85
+ "Czech": "Helsinki-NLP/opus-mt-en-cs",
86
+ "Catalan": "Helsinki-NLP/opus-mt-en-ca",
87
+ "Bulgarian": "Helsinki-NLP/opus-mt-en-bg",
88
+ "Estonian": "Helsinki-NLP/opus-mt-en-et",
89
+ "Basque": "Helsinki-NLP/opus-mt-en-eu",
90
+ "Vietnamese": "Helsinki-NLP/opus-mt-en-vi",
91
+ "Finnish": "Helsinki-NLP/opus-mt-en-fi",
92
+ "Hebrew": "Helsinki-NLP/opus-mt-en-he",
93
+ "Azerbaijani": "Helsinki-NLP/opus-mt-en-az",
94
+ "Afrikaans": "Helsinki-NLP/opus-mt-en-af",
95
+ "Armenian": "Helsinki-NLP/opus-mt-en-hy",
96
+ "Hungarian": "Helsinki-NLP/opus-mt-en-hu"
97
+ }
98
+
99
+ # Dropdown for language selection
100
+ language = st.selectbox(
101
+ "Select target language",
102
+ list(language_model_mapping.keys())
103
+ )
104
+
105
+ # Input method selection
106
+ input_option = st.radio("Select input method:", ("Text Input", "Upload Document"))
107
+
108
+ input_text = ""
109
+
110
+ # Functions to extract text from files
111
+ def extract_text_from_pdf(pdf_file):
112
+ try:
113
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
114
+ text = ""
115
+ for page_num in range(len(pdf_reader.pages)):
116
+ page = pdf_reader.pages[page_num]
117
+ extracted_text = page.extract_text()
118
+ if extracted_text:
119
+ text += extracted_text + "\n"
120
+ return text
121
+ except Exception as e:
122
+ st.error(f"Error extracting text from PDF: {e}")
123
+ return ""
124
+
125
+ def extract_text_from_docx(docx_file):
126
+ try:
127
+ doc = docx.Document(docx_file)
128
+ text = ""
129
+ for para in doc.paragraphs:
130
+ text += para.text + "\n"
131
+ return text
132
+ except Exception as e:
133
+ st.error(f"Error extracting text from Word document: {e}")
134
+ return ""
135
+
136
+ # Text area or file uploader based on input method
137
+ if input_option == "Text Input":
138
+ input_text = st.text_area("Enter text to translate", height=200)
139
+ elif input_option == "Upload Document":
140
+ uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"])
141
+ if uploaded_file is not None:
142
+ file_extension = os.path.splitext(uploaded_file.name)[1].lower()
143
+ if file_extension == ".pdf":
144
+ with st.spinner("Extracting text from PDF..."):
145
+ input_text = extract_text_from_pdf(uploaded_file)
146
+ elif file_extension == ".docx":
147
+ with st.spinner("Extracting text from Word document..."):
148
+ input_text = extract_text_from_docx(uploaded_file)
149
+ else:
150
+ st.error("Unsupported file type.")
151
+ input_text = ""
152
+
153
+ # Function to split text into chunks
154
+ def split_text_into_chunks(text, max_chunk_size):
155
+ return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
156
+
157
+ # Function to perform the translation with retry mechanism
158
+ def translate_text(text, target_lang, max_retries=5, backoff_factor=2):
159
+ model = language_model_mapping.get(target_lang)
160
+ if not model:
161
+ st.error("Unsupported language selected.")
162
+ return None
163
+
164
+ # Retrieve Hugging Face API key from environment variables
165
+ hf_api_key = os.getenv('HF_API_KEY')
166
+ if not hf_api_key:
167
+ st.error("Hugging Face API key not set in environment variables.")
168
+ return None
169
+
170
+ API_URL = f"https://api-inference.huggingface.co/models/{model}"
171
+ headers = {
172
+ "Authorization": f"Bearer {hf_api_key}" # Use the API key from environment variables
173
+ }
174
+
175
+ # Split the text into manageable chunks
176
+ max_chunk_size = 500 # Adjust based on API limitations
177
+ text_chunks = split_text_into_chunks(text, max_chunk_size)
178
+ translated_chunks = []
179
+
180
+ for chunk_index, chunk in enumerate(text_chunks):
181
+ attempt = 0
182
+ while attempt < max_retries:
183
+ payload = {
184
+ "inputs": chunk,
185
+ }
186
+ try:
187
+ response = requests.post(API_URL, headers=headers, json=payload)
188
+ if response.status_code == 503:
189
+ # Service Unavailable, retry after delay
190
+ attempt += 1
191
+ wait_time = backoff_factor ** attempt
192
+ time.sleep(wait_time)
193
+ continue
194
+ response.raise_for_status() # Raise an error for bad status codes
195
+ result = response.json()
196
+
197
+ # Handle possible errors from the API
198
+ if isinstance(result, dict) and result.get("error"):
199
+ st.error(f"Error from translation API: {result['error']}")
200
+ return None
201
+
202
+ # The API might return a list of translations
203
+ if isinstance(result, list) and len(result) > 0:
204
+ translated_text = result[0].get("translation_text", "No translation found.")
205
+ elif isinstance(result, dict) and "translation_text" in result:
206
+ translated_text = result["translation_text"]
207
+ else:
208
+ translated_text = "Unexpected response format from the API."
209
+
210
+ translated_chunks.append(translated_text)
211
+ break # Exit the retry loop if successful
212
+
213
+ except requests.exceptions.RequestException as e:
214
+ attempt += 1
215
+ wait_time = backoff_factor ** attempt
216
+ time.sleep(wait_time)
217
+ else:
218
+ # All retry attempts failed for this chunk
219
+ st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.")
220
+ return None
221
+
222
+ return " ".join(translated_chunks)
223
+
224
+ # Translate button
225
+ if st.button("Translate"):
226
+ if not input_text.strip():
227
+ st.warning("Please enter some text to translate.")
228
+ else:
229
+ with st.spinner("Translation service loading..."):
230
+ translated = translate_text(input_text, language)
231
+ if translated:
232
+ st.subheader("Translated Text:")
233
+ st.write(translated)
234
+ else:
235
+ st.error("Translation failed. Please try again later.")