File size: 3,043 Bytes
a792f11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
import getpass
import os

model = ChatGroq(
    model_name="llama-3.1-70b-versatile",
    temperature=0.5,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

from PyPDF2 import PdfReader
import docx

def read_text_from_file(filepath):
    if filepath.endswith('.pdf'):
        return read_pdf(filepath)
    elif filepath.endswith('.docx'):
        return read_docx(filepath)
    elif filepath.endswith('.txt'):
        return read_txt(filepath)
    else:
        raise ValueError("Unsupported file format. Please provide a .pdf, .docx, or .txt file.")

def read_pdf(filepath):
    paragraphs = []
    with open(filepath, 'rb') as file:
        reader = PdfReader(file)
        for page in reader.pages:
            text = page.extract_text()
            if text:
                paragraphs.extend(text.split('\n\n'))  # Splitting by double newlines to get paragraphs
    return paragraphs

def read_docx(filepath):
    doc = docx.Document(filepath)
    paragraphs = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip() != ""]
    return paragraphs

def read_txt(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        text = file.read()
    paragraphs = text.split('\n\n')  # Splitting by double newlines to get paragraphs
    return paragraphs

template = ChatPromptTemplate.from_template("""
###TASK###
You are a native Urdu speaker and an expert translator.
Your task is to translate the given text into Urdu, ensuring the use of vocabulary and expressions that a native speaker would use. No preamble and nothing else should be output.
No matter what the original tone was, make sure to translate in the given tone.

###TONE TO FOLLOW###
{tone}

###TEXT TO TRANSLATE###
{paragraph}
""")

def get_translation(filepath, tone):
    paragraphs = read_text_from_file(filepath)
    output_text = ""

    for paragraph in paragraphs:
        prompt = template.invoke({"tone": tone, 'paragraph': paragraph})
        translation = model.invoke(prompt)
        output_text += translation.content + '\n\n'
        print(translation.content)

    return output_text

import gradio as gr

# Gradio interface function
def gradio_interface(file, tone):
    # Save the uploaded file to a temporary location
    filepath = file.name
    # Call the translation function with the file path and tone
    result = get_translation(filepath, tone)
    return result

# Define the Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[gr.File(label="Upload a Text File"), gr.Textbox(label="Enter Tone")],
    outputs=gr.Textbox(label="Translated Text Output"),
    title="Translate your documents"
)

# Launch the interface
iface.launch()