Spaces:
Sleeping
Sleeping
from langchain.retrievers.multi_query import MultiQueryRetriever | |
from langchain_groq import ChatGroq | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate | |
from langchain_core.pydantic_v1 import BaseModel | |
from langchain_core.runnables import RunnableParallel, RunnablePassthrough | |
import getpass | |
import os | |
model = ChatGroq( | |
model_name="llama-3.1-70b-versatile", | |
temperature=0.5, | |
max_tokens=None, | |
timeout=None, | |
max_retries=2, | |
) | |
from PyPDF2 import PdfReader | |
import docx | |
def read_text_from_file(filepath): | |
if filepath.endswith('.pdf'): | |
return read_pdf(filepath) | |
elif filepath.endswith('.docx'): | |
return read_docx(filepath) | |
elif filepath.endswith('.txt'): | |
return read_txt(filepath) | |
else: | |
raise ValueError("Unsupported file format. Please provide a .pdf, .docx, or .txt file.") | |
def read_pdf(filepath): | |
paragraphs = [] | |
with open(filepath, 'rb') as file: | |
reader = PdfReader(file) | |
for page in reader.pages: | |
text = page.extract_text() | |
if text: | |
paragraphs.extend(text.split('\n\n')) # Splitting by double newlines to get paragraphs | |
return paragraphs | |
def read_docx(filepath): | |
doc = docx.Document(filepath) | |
paragraphs = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip() != ""] | |
return paragraphs | |
def read_txt(filepath): | |
with open(filepath, 'r', encoding='utf-8') as file: | |
text = file.read() | |
paragraphs = text.split('\n\n') # Splitting by double newlines to get paragraphs | |
return paragraphs | |
template = ChatPromptTemplate.from_template(""" | |
###TASK### | |
You are a native Urdu speaker and an expert translator. | |
Your task is to translate the given text into Urdu, ensuring the use of vocabulary and expressions that a native speaker would use. No preamble and nothing else should be output. | |
No matter what the original tone was, make sure to translate in the given tone. | |
###TONE TO FOLLOW### | |
{tone} | |
###TEXT TO TRANSLATE### | |
{paragraph} | |
""") | |
def get_translation(filepath, tone): | |
paragraphs = read_text_from_file(filepath) | |
output_text = "" | |
for paragraph in paragraphs: | |
prompt = template.invoke({"tone": tone, 'paragraph': paragraph}) | |
translation = model.invoke(prompt) | |
output_text += translation.content + '\n\n' | |
print(translation.content) | |
return output_text | |
import gradio as gr | |
# Gradio interface function | |
def gradio_interface(file, tone): | |
# Save the uploaded file to a temporary location | |
filepath = file.name | |
# Call the translation function with the file path and tone | |
result = get_translation(filepath, tone) | |
return result | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=[gr.File(label="Upload a Text File"), gr.Textbox(label="Enter Tone")], | |
outputs=gr.Textbox(label="Translated Text Output"), | |
title="Translate your documents" | |
) | |
# Launch the interface | |
iface.launch() |