Translator

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Oct 18, 2024

Commit

ed6a185

verified ·

1 Parent(s): ae49dc9

Create app.py

Browse files

Files changed (1) hide show

app.py +235 -0

app.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import streamlit as st
+import requests
+import os
+import PyPDF2
+import docx
+import time
+# Set the title of the app
+st.title("Galactic Babel Fish Translator")
+# Description
+st.write("""
+Choose a target language, enter your text or upload a document, and click **Translate** to get the translated text.
+""")
+#------------------------------------------------------------------------
+# Configurations
+#------------------------------------------------------------------------
+# Streamlit page setup
+st.set_page_config(
+    page_title="Text Translator",
+    page_icon=":speech_balloon:",
+    layout="centered",
+    initial_sidebar_state="auto",
+    menu_items={
+        'Get Help': 'mailto:[email protected]',
+        'About': "This app is built to support translation tasks"
+    }
+)
+#------------------------------------------------------------------------
+# Sidebar
+#------------------------------------------------------------------------
+with st.sidebar:
+    # Password input field
+    # password = st.text_input("Enter Password:", type="password")
+    # Set the desired width in pixels
+    image_width = 300
+    # Define the path to the image
+    image_path = "/Users/cheynelevesseur/Desktop/Manual Library/Python_Code/LLM_Projects_1/intervention_analysis_app/mimtss_logo.png"
+    # Display the image
+    st.image(image_path, width=image_width)
+    # Toggle for Help and Report a Bug
+    with st.expander("Need help and report a bug"):
+        st.write("""
+        **Contact**: Cheyne LeVesseur, PhD
+        **Email**: [email protected]
+        """)
+    st.divider()
+    st.subheader('User Instructions')
+    # Principles text with Markdown formatting
+    User_Instructions = """
+    - **Step 1**: Provide either text input or upload a document for translation.
+    - **Step 2**: Click Translate.
+    - **Step 3**: Sit back, relax, and let the magic happen!
+    """
+    st.markdown(User_Instructions)
+#------------------------------------------------------------------------
+# Functions
+#------------------------------------------------------------------------
+# Language to model mapping
+language_model_mapping = {
+    "Spanish": "Helsinki-NLP/opus-mt-en-es",
+    "Arabic": "Helsinki-NLP/opus-mt-en-ar",
+    "Chinese": "Helsinki-NLP/opus-mt-en-zh",
+    "Albanian": "Helsinki-NLP/opus-mt-en-sq",
+    "French": "Helsinki-NLP/opus-mt-en-fr",
+    "German": "Helsinki-NLP/opus-mt-en-de",
+    "Japanese": "Helsinki-NLP/opus-mt-en-jap",
+    "Italian": "Helsinki-NLP/opus-mt-en-it",
+    "Dutch": "Helsinki-NLP/opus-mt-en-nl",
+    "Hindi": "Helsinki-NLP/opus-mt-en-hi",
+    "Russian": "Helsinki-NLP/opus-mt-en-ru",
+    "Indonesian": "Helsinki-NLP/opus-mt-en-id",
+    "Greek": "Helsinki-NLP/opus-mt-en-el",
+    "Danish": "Helsinki-NLP/opus-mt-en-da",
+    "Swedish": "Helsinki-NLP/opus-mt-en-sv",
+    "Czech": "Helsinki-NLP/opus-mt-en-cs",
+    "Catalan": "Helsinki-NLP/opus-mt-en-ca",
+    "Bulgarian": "Helsinki-NLP/opus-mt-en-bg",
+    "Estonian": "Helsinki-NLP/opus-mt-en-et",
+    "Basque": "Helsinki-NLP/opus-mt-en-eu",
+    "Vietnamese": "Helsinki-NLP/opus-mt-en-vi",
+    "Finnish": "Helsinki-NLP/opus-mt-en-fi",
+    "Hebrew": "Helsinki-NLP/opus-mt-en-he",
+    "Azerbaijani": "Helsinki-NLP/opus-mt-en-az",
+    "Afrikaans": "Helsinki-NLP/opus-mt-en-af",
+    "Armenian": "Helsinki-NLP/opus-mt-en-hy",
+    "Hungarian": "Helsinki-NLP/opus-mt-en-hu"
+}
+# Dropdown for language selection
+language = st.selectbox(
+    "Select target language",
+    list(language_model_mapping.keys())
+)
+# Input method selection
+input_option = st.radio("Select input method:", ("Text Input", "Upload Document"))
+input_text = ""
+# Functions to extract text from files
+def extract_text_from_pdf(pdf_file):
+    try:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
+        text = ""
+        for page_num in range(len(pdf_reader.pages)):
+            page = pdf_reader.pages[page_num]
+            extracted_text = page.extract_text()
+            if extracted_text:
+                text += extracted_text + "\n"
+        return text
+    except Exception as e:
+        st.error(f"Error extracting text from PDF: {e}")
+        return ""
+def extract_text_from_docx(docx_file):
+    try:
+        doc = docx.Document(docx_file)
+        text = ""
+        for para in doc.paragraphs:
+            text += para.text + "\n"
+        return text
+    except Exception as e:
+        st.error(f"Error extracting text from Word document: {e}")
+        return ""
+# Text area or file uploader based on input method
+if input_option == "Text Input":
+    input_text = st.text_area("Enter text to translate", height=200)
+elif input_option == "Upload Document":
+    uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"])
+    if uploaded_file is not None:
+        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
+        if file_extension == ".pdf":
+            with st.spinner("Extracting text from PDF..."):
+                input_text = extract_text_from_pdf(uploaded_file)
+        elif file_extension == ".docx":
+            with st.spinner("Extracting text from Word document..."):
+                input_text = extract_text_from_docx(uploaded_file)
+        else:
+            st.error("Unsupported file type.")
+            input_text = ""
+# Function to split text into chunks
+def split_text_into_chunks(text, max_chunk_size):
+    return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
+# Function to perform the translation with retry mechanism
+def translate_text(text, target_lang, max_retries=5, backoff_factor=2):
+    model = language_model_mapping.get(target_lang)
+    if not model:
+        st.error("Unsupported language selected.")
+        return None
+    # Retrieve Hugging Face API key from environment variables
+    hf_api_key = os.getenv('HF_API_KEY')
+    if not hf_api_key:
+        st.error("Hugging Face API key not set in environment variables.")
+        return None
+    API_URL = f"https://api-inference.huggingface.co/models/{model}"
+    headers = {
+        "Authorization": f"Bearer {hf_api_key}"  # Use the API key from environment variables
+    }
+    # Split the text into manageable chunks
+    max_chunk_size = 500  # Adjust based on API limitations
+    text_chunks = split_text_into_chunks(text, max_chunk_size)
+    translated_chunks = []
+    for chunk_index, chunk in enumerate(text_chunks):
+        attempt = 0
+        while attempt < max_retries:
+            payload = {
+                "inputs": chunk,
+            }
+            try:
+                response = requests.post(API_URL, headers=headers, json=payload)
+                if response.status_code == 503:
+                    # Service Unavailable, retry after delay
+                    attempt += 1
+                    wait_time = backoff_factor ** attempt
+                    time.sleep(wait_time)
+                    continue
+                response.raise_for_status()  # Raise an error for bad status codes
+                result = response.json()
+                # Handle possible errors from the API
+                if isinstance(result, dict) and result.get("error"):
+                    st.error(f"Error from translation API: {result['error']}")
+                    return None
+                # The API might return a list of translations
+                if isinstance(result, list) and len(result) > 0:
+                    translated_text = result[0].get("translation_text", "No translation found.")
+                elif isinstance(result, dict) and "translation_text" in result:
+                    translated_text = result["translation_text"]
+                else:
+                    translated_text = "Unexpected response format from the API."
+                translated_chunks.append(translated_text)
+                break  # Exit the retry loop if successful
+            except requests.exceptions.RequestException as e:
+                attempt += 1
+                wait_time = backoff_factor ** attempt
+                time.sleep(wait_time)
+        else:
+            # All retry attempts failed for this chunk
+            st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.")
+            return None
+    return " ".join(translated_chunks)
+# Translate button
+if st.button("Translate"):
+    if not input_text.strip():
+        st.warning("Please enter some text to translate.")
+    else:
+        with st.spinner("Translation service loading..."):
+            translated = translate_text(input_text, language)
+            if translated:
+                st.subheader("Translated Text:")
+                st.write(translated)
+            else:
+                st.error("Translation failed. Please try again later.")