Spaces:

tehreemfarooqi
/

doc_urdu_translation

Runtime error

App Files Files Community

doc_urdu_translation / app.py

tehreemfarooqi

Create app.py

a792f11 verified about 1 year ago

raw

history blame

3.04 kB

	from langchain.retrievers.multi_query import MultiQueryRetriever
	from langchain_groq import ChatGroq
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
	from langchain_core.pydantic_v1 import BaseModel
	from langchain_core.runnables import RunnableParallel, RunnablePassthrough
	import getpass
	import os

	model = ChatGroq(
	model_name="llama-3.1-70b-versatile",
	temperature=0.5,
	max_tokens=None,
	timeout=None,
	max_retries=2,
	)

	from PyPDF2 import PdfReader
	import docx

	def read_text_from_file(filepath):
	if filepath.endswith('.pdf'):
	return read_pdf(filepath)
	elif filepath.endswith('.docx'):
	return read_docx(filepath)
	elif filepath.endswith('.txt'):
	return read_txt(filepath)
	else:
	raise ValueError("Unsupported file format. Please provide a .pdf, .docx, or .txt file.")

	def read_pdf(filepath):
	paragraphs = []
	with open(filepath, 'rb') as file:
	reader = PdfReader(file)
	for page in reader.pages:
	text = page.extract_text()
	if text:
	paragraphs.extend(text.split('\n\n')) # Splitting by double newlines to get paragraphs
	return paragraphs

	def read_docx(filepath):
	doc = docx.Document(filepath)
	paragraphs = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip() != ""]
	return paragraphs

	def read_txt(filepath):
	with open(filepath, 'r', encoding='utf-8') as file:
	text = file.read()
	paragraphs = text.split('\n\n') # Splitting by double newlines to get paragraphs
	return paragraphs

	template = ChatPromptTemplate.from_template("""
	###TASK###
	You are a native Urdu speaker and an expert translator.
	Your task is to translate the given text into Urdu, ensuring the use of vocabulary and expressions that a native speaker would use. No preamble and nothing else should be output.
	No matter what the original tone was, make sure to translate in the given tone.

	###TONE TO FOLLOW###
	{tone}

	###TEXT TO TRANSLATE###
	{paragraph}
	""")

	def get_translation(filepath, tone):
	paragraphs = read_text_from_file(filepath)
	output_text = ""

	for paragraph in paragraphs:
	prompt = template.invoke({"tone": tone, 'paragraph': paragraph})
	translation = model.invoke(prompt)
	output_text += translation.content + '\n\n'
	print(translation.content)

	return output_text

	import gradio as gr

	# Gradio interface function
	def gradio_interface(file, tone):
	# Save the uploaded file to a temporary location
	filepath = file.name
	# Call the translation function with the file path and tone
	result = get_translation(filepath, tone)
	return result

	# Define the Gradio interface
	iface = gr.Interface(
	fn=gradio_interface,
	inputs=[gr.File(label="Upload a Text File"), gr.Textbox(label="Enter Tone")],
	outputs=gr.Textbox(label="Translated Text Output"),
	title="Translate your documents"
	)

	# Launch the interface
	iface.launch()