File size: 1,485 Bytes
4241c9e
 
6841cb0
 
 
 
 
 
 
 
4241c9e
6841cb0
 
 
4241c9e
 
 
 
 
 
 
 
 
 
 
 
 
 
14e31fa
4241c9e
14e31fa
 
 
 
4241c9e
6841cb0
14e31fa
 
 
 
 
4241c9e
14e31fa
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import streamlit as st
import os
from transformers import pipeline
from langdetect import detect
from groq import Groq

# Load Hugging Face token from environment
HF_TOKEN = os.environ.get("homeo_doc")
if not HF_TOKEN:
    st.error("Missing Hugging Face API token. Set 'homeo_doc' in environment variables.")

# Initialize translation pipeline
translator = pipeline("translation", model="facebook/nllb-200-distilled-600M", token=HF_TOKEN)

# Language code mapping for NLLB-200
LANG_CODE_MAP = {
    'en': 'eng_Latn',    # English
    'ur': 'urd_Arab',    # Urdu
    'ar': 'arb_Arab',    # Arabic
    'es': 'spa_Latn',    # Spanish
    'hi': 'hin_Deva',    # Hindi
    'fr': 'fra_Latn'     # French
}

def translate_text(text, target_lang='eng_Latn'):
    """Translate text using NLLB-200"""
    try:
        source_lang = detect(text)
        source_code = LANG_CODE_MAP.get(source_lang, 'eng_Latn')  # Detect source language

        translation = translator(
            text, 
            src_lang=source_code,   # Pass source language
            tgt_lang=target_lang    # Pass target language
        )
        
        return translation[0]['translation_text']
    
    except Exception as e:
        st.error(f"Translation error: {str(e)}")
        return text

# Test function
if __name__ == "__main__":
    test_text = "یہ ایک آزمائشی جملہ ہے۔"  # Urdu sample text
    translated = translate_text(test_text, "eng_Latn")
    print("Translated:", translated)