Spaces:

tehreemfarooqi
/

doc_urdu_translation

Sleeping

App Files Files Community

tehreemfarooqi commited on Sep 5, 2024

Commit

a792f11

verified ·

1 Parent(s): a8e508c

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from langchain.retrievers.multi_query import MultiQueryRetriever
+from langchain_groq import ChatGroq
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.runnables import RunnableParallel, RunnablePassthrough
+import getpass
+import os
+model = ChatGroq(
+    model_name="llama-3.1-70b-versatile",
+    temperature=0.5,
+    max_tokens=None,
+    timeout=None,
+    max_retries=2,
+)
+from PyPDF2 import PdfReader
+import docx
+def read_text_from_file(filepath):
+    if filepath.endswith('.pdf'):
+        return read_pdf(filepath)
+    elif filepath.endswith('.docx'):
+        return read_docx(filepath)
+    elif filepath.endswith('.txt'):
+        return read_txt(filepath)
+    else:
+        raise ValueError("Unsupported file format. Please provide a .pdf, .docx, or .txt file.")
+def read_pdf(filepath):
+    paragraphs = []
+    with open(filepath, 'rb') as file:
+        reader = PdfReader(file)
+        for page in reader.pages:
+            text = page.extract_text()
+            if text:
+                paragraphs.extend(text.split('\n\n'))  # Splitting by double newlines to get paragraphs
+    return paragraphs
+def read_docx(filepath):
+    doc = docx.Document(filepath)
+    paragraphs = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip() != ""]
+    return paragraphs
+def read_txt(filepath):
+    with open(filepath, 'r', encoding='utf-8') as file:
+        text = file.read()
+    paragraphs = text.split('\n\n')  # Splitting by double newlines to get paragraphs
+    return paragraphs
+template = ChatPromptTemplate.from_template("""
+###TASK###
+You are a native Urdu speaker and an expert translator.
+Your task is to translate the given text into Urdu, ensuring the use of vocabulary and expressions that a native speaker would use. No preamble and nothing else should be output.
+No matter what the original tone was, make sure to translate in the given tone.
+###TONE TO FOLLOW###
+{tone}
+###TEXT TO TRANSLATE###
+{paragraph}
+""")
+def get_translation(filepath, tone):
+    paragraphs = read_text_from_file(filepath)
+    output_text = ""
+    for paragraph in paragraphs:
+        prompt = template.invoke({"tone": tone, 'paragraph': paragraph})
+        translation = model.invoke(prompt)
+        output_text += translation.content + '\n\n'
+        print(translation.content)
+    return output_text
+import gradio as gr
+# Gradio interface function
+def gradio_interface(file, tone):
+    # Save the uploaded file to a temporary location
+    filepath = file.name
+    # Call the translation function with the file path and tone
+    result = get_translation(filepath, tone)
+    return result
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[gr.File(label="Upload a Text File"), gr.Textbox(label="Enter Tone")],
+    outputs=gr.Textbox(label="Translated Text Output"),
+    title="Translate your documents"
+)
+# Launch the interface
+iface.launch()