Spaces:
Sleeping
Sleeping
File size: 1,485 Bytes
4241c9e 6841cb0 4241c9e 6841cb0 4241c9e 14e31fa 4241c9e 14e31fa 4241c9e 6841cb0 14e31fa 4241c9e 14e31fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import streamlit as st
import os
from transformers import pipeline
from langdetect import detect
from groq import Groq
# Load Hugging Face token from environment
HF_TOKEN = os.environ.get("homeo_doc")
if not HF_TOKEN:
st.error("Missing Hugging Face API token. Set 'homeo_doc' in environment variables.")
# Initialize translation pipeline
translator = pipeline("translation", model="facebook/nllb-200-distilled-600M", token=HF_TOKEN)
# Language code mapping for NLLB-200
LANG_CODE_MAP = {
'en': 'eng_Latn', # English
'ur': 'urd_Arab', # Urdu
'ar': 'arb_Arab', # Arabic
'es': 'spa_Latn', # Spanish
'hi': 'hin_Deva', # Hindi
'fr': 'fra_Latn' # French
}
def translate_text(text, target_lang='eng_Latn'):
"""Translate text using NLLB-200"""
try:
source_lang = detect(text)
source_code = LANG_CODE_MAP.get(source_lang, 'eng_Latn') # Detect source language
translation = translator(
text,
src_lang=source_code, # Pass source language
tgt_lang=target_lang # Pass target language
)
return translation[0]['translation_text']
except Exception as e:
st.error(f"Translation error: {str(e)}")
return text
# Test function
if __name__ == "__main__":
test_text = "یہ ایک آزمائشی جملہ ہے۔" # Urdu sample text
translated = translate_text(test_text, "eng_Latn")
print("Translated:", translated)
|