tehreemfarooqi commited on
Commit
a792f11
·
verified ·
1 Parent(s): a8e508c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.retrievers.multi_query import MultiQueryRetriever
2
+ from langchain_groq import ChatGroq
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
5
+ from langchain_core.pydantic_v1 import BaseModel
6
+ from langchain_core.runnables import RunnableParallel, RunnablePassthrough
7
+ import getpass
8
+ import os
9
+
10
+ model = ChatGroq(
11
+ model_name="llama-3.1-70b-versatile",
12
+ temperature=0.5,
13
+ max_tokens=None,
14
+ timeout=None,
15
+ max_retries=2,
16
+ )
17
+
18
+ from PyPDF2 import PdfReader
19
+ import docx
20
+
21
+ def read_text_from_file(filepath):
22
+ if filepath.endswith('.pdf'):
23
+ return read_pdf(filepath)
24
+ elif filepath.endswith('.docx'):
25
+ return read_docx(filepath)
26
+ elif filepath.endswith('.txt'):
27
+ return read_txt(filepath)
28
+ else:
29
+ raise ValueError("Unsupported file format. Please provide a .pdf, .docx, or .txt file.")
30
+
31
+ def read_pdf(filepath):
32
+ paragraphs = []
33
+ with open(filepath, 'rb') as file:
34
+ reader = PdfReader(file)
35
+ for page in reader.pages:
36
+ text = page.extract_text()
37
+ if text:
38
+ paragraphs.extend(text.split('\n\n')) # Splitting by double newlines to get paragraphs
39
+ return paragraphs
40
+
41
+ def read_docx(filepath):
42
+ doc = docx.Document(filepath)
43
+ paragraphs = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip() != ""]
44
+ return paragraphs
45
+
46
+ def read_txt(filepath):
47
+ with open(filepath, 'r', encoding='utf-8') as file:
48
+ text = file.read()
49
+ paragraphs = text.split('\n\n') # Splitting by double newlines to get paragraphs
50
+ return paragraphs
51
+
52
+ template = ChatPromptTemplate.from_template("""
53
+ ###TASK###
54
+ You are a native Urdu speaker and an expert translator.
55
+ Your task is to translate the given text into Urdu, ensuring the use of vocabulary and expressions that a native speaker would use. No preamble and nothing else should be output.
56
+ No matter what the original tone was, make sure to translate in the given tone.
57
+
58
+ ###TONE TO FOLLOW###
59
+ {tone}
60
+
61
+ ###TEXT TO TRANSLATE###
62
+ {paragraph}
63
+ """)
64
+
65
+ def get_translation(filepath, tone):
66
+ paragraphs = read_text_from_file(filepath)
67
+ output_text = ""
68
+
69
+ for paragraph in paragraphs:
70
+ prompt = template.invoke({"tone": tone, 'paragraph': paragraph})
71
+ translation = model.invoke(prompt)
72
+ output_text += translation.content + '\n\n'
73
+ print(translation.content)
74
+
75
+ return output_text
76
+
77
+ import gradio as gr
78
+
79
+ # Gradio interface function
80
+ def gradio_interface(file, tone):
81
+ # Save the uploaded file to a temporary location
82
+ filepath = file.name
83
+ # Call the translation function with the file path and tone
84
+ result = get_translation(filepath, tone)
85
+ return result
86
+
87
+ # Define the Gradio interface
88
+ iface = gr.Interface(
89
+ fn=gradio_interface,
90
+ inputs=[gr.File(label="Upload a Text File"), gr.Textbox(label="Enter Tone")],
91
+ outputs=gr.Textbox(label="Translated Text Output"),
92
+ title="Translate your documents"
93
+ )
94
+
95
+ # Launch the interface
96
+ iface.launch()