translation-tools-tamil

Sleeping

File size: 7,596 Bytes

import os
import re
import json
import time
import requests
import anthropic
import google.auth
import gradio as gr
from uuid import uuid4
from dotenv import load_dotenv
from google.auth.transport.requests import Request


# Gemini
def get_google_token():
    credentials, project = google.auth.load_credentials_from_dict(
        json.loads(os.environ.get('GCP_FINETUNE_KEY')),
        scopes=[
            "https://www.googleapis.com/auth/cloud-platform",
            "https://www.googleapis.com/auth/generative-language.tuning",
        ],
    )
    request = Request()
    credentials.refresh(request)
    access_token = credentials.token
    return access_token


def clean(result):
    text = result["choices"][0]['message']["content"]
    text = re.sub(r"\(.*?\)|\[.*?\]","", text)
    text = text.strip("'").replace('"', "")
    if "\n" in text.strip("\n"):
        text = text.split("\n")[-1]
    return text


def dubpro_hindi_to_tamil(text):
    API_URL = "https://api.openai.com/v1/chat/completions"
    prompt = f"""Please convert the following Hindi text into Tamil,  where it is important to maintain context, so please translate it in a colloquial manner, like that of a YouTube video. Just print the translation.
___
{text}
___
"""

    messages = [
        {"role": "system", "content": f"You are a language translation assistant."},
        {"role": "user", "content": prompt}
    ]
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
    }
    
    payload = {
        "model": "gpt-4",
        "messages": messages
    }
    
    result = requests.post(
        url=API_URL,
        headers=headers,
        json=payload
    )
    response = result.json()
    return clean(response)


def gemini_hindi_to_tamil(text):
    API_URL = os.environ.get("GEMINI_API")
    BEARER_TOKEN = get_google_token()
    headers = {
        "Authorization": f"Bearer {BEARER_TOKEN}",
        "Content-Type": "application/json",
    }
    payload = {
        "contents": [
            {
                "parts": [{"text": f"Translate the following text to Tamil: `{text}` Output: "}],
                "role": "user",
            }
        ],
        "generationConfig": {
            "maxOutputTokens": 8192,
            "temperature": 0.85,
        },
        "safetySettings": [
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
        ],
    }
    result = requests.post(
        url=API_URL,
        headers=headers,
        json=payload
    )
    response = result.json()
    try:
        response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("translated:", "").replace("`", "").strip()
    except:
        response_content = response['candidates'][0]
    return response_content


# GPT models
def clean(result):
    text = result["choices"][0]['message']["content"]
    text = re.sub(r"\(.*?\)|\[.*?\]","", text)
    text = text.strip("'").replace('"', "")
    if "\n" in text.strip("\n"):
        text = text.split("\n")[-1]
    return text

def openai_hindi_to_tamil(text, model):
    prompt = f"Translate the following Hindi text into Tamil such that the meaning in unchanged. Return only the translated text: `{text}`. Output: "

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
    }
    
    messages = [
        {"role": "system", "content": f"You are a language translation assistant."},
        {"role": "user", "content": prompt}
    ]
    
    resp = None
    while resp is None:
        resp = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json={
            "model": model,
            "messages": messages
        })
        if resp.status_code != 200:
            print(resp.text)
        time.sleep(0.5)
    response_json = resp.json()

    result_text = clean(response_json)
    return result_text


# Azure translate
def azure_hindi_to_tamil(text):
    headers = {
        "Ocp-Apim-Subscription-Key": os.environ.get("AZURE_TRANSLATE_KEY"),
        "Ocp-Apim-Subscription-Region": os.environ.get("AZURE_TRANSLATE_REGION"),
        "Content-type": "application/json",
        "X-ClientTraceId": str(uuid4()),
    }
    ENDPOINT = "https://api.cognitive.microsofttranslator.com/translate"
    params = {
        "api-version": "3.0",
        "from": "hi-IN",
        "to": "ta-IN",
    }
    texts = [{"text": text}]
    request = requests.post(ENDPOINT, headers=headers, params=params, json=texts)
    response = request.json()
    return response[0]["translations"][0]["text"]


# Anthopic Claude 3 Haiku
def claude_hindi_to_tamil(text):
    client = anthropic.Anthropic()
    message = client.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=1000,
        temperature=0.8,
        system="You are an expert language translator.",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Translate the following Hindi text into Tamil such that the meaning in unchanged. Return only the translated text: `{text}`. Output: "
                    }
                ]
            }
        ]
    )
    return message.content[0].text


def render_translations(text):
    dubpro = dubpro_hindi_to_tamil(text)
    azure = azure_hindi_to_tamil(text)
    # gemini = gemini_hindi_to_tamil(text)
    gpt_4 = openai_hindi_to_tamil(text, model="gpt-4")
    # claude_haiku = claude_english_to_hindi(text)
    return gr.update(value=gpt_4), gr.update(value=dubpro), gr.update(value=azure)


with gr.Blocks(title="Hindi to Tamil Translation Tools", theme="gradio/monochrome") as demo:
    gr.Markdown("# Hindi to Tamil Translation for Dubbing")
    input_textbox = gr.Textbox(label="Input Text", info="Text to translate", value="जब आपने यह किया, तो आपने अपनी कक्षाएं अच्छी तरह से अटेंड की होंगी या आपने अपना दैनिक रिवीजन किया होगा। अब तुम्हें डर लगने लगा है।")
    submit = gr.Button(label="Submit")
    with gr.Row():
        gr.Label(value="Dubpro's Model", scale=1)
        dubpro_model_textbox = gr.Textbox(label="Translated Text", scale=2, interactive=False)
    with gr.Row():
        gr.Label(value="GPT 4", scale=1)
        gpt_4_textbox = gr.Textbox(label="Translated Text", scale=2, interactive=False)
    # with gr.Row():
    #     gr.Label(value="Google Gemini", scale=1)
    #     google_textbox = gr.Textbox(label="Translated Text", scale=2, interactive=False)
    # with gr.Row():
    #     gr.Label(value="Anthropic Claude 3", scale=1)
    #     claude_textbox = gr.Textbox(label="Translated Text", scale=2, interactive=False)
    with gr.Row():
        gr.Label(value="Azure Translate", scale=1)
        azure_textbox = gr.Textbox(label="Translated Text", scale=2, interactive=False)

    submit.click(render_translations, input_textbox, [gpt_4_textbox, dubpro_model_textbox, azure_textbox])


if __name__=="__main__":
    demo.launch(auth=(os.environ["USERNAME"], os.environ["PASSWORD"]))