richardkimsm89 commited on
Commit
ff084ce
·
verified ·
1 Parent(s): a2d8b16

Create development/app

Browse files
Files changed (1) hide show
  1. development/app +151 -0
development/app ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import pytesseract
4
+ from PIL import Image
5
+ from pypdf import PdfReader
6
+ import ocrmypdf
7
+ import os
8
+
9
+ # Image to Text
10
+
11
+ def fn_image_to_text(input_image):
12
+ return pytesseract.image_to_string(Image.open(input_image))
13
+
14
+ # PDF to Text
15
+
16
+ def fn_pdf_to_text(input_pdf):
17
+ reader = PdfReader(input_pdf)
18
+
19
+ output_pdf = ""
20
+ for page in reader.pages:
21
+ output_pdf+=page.extract_text()
22
+
23
+ image_count = 0
24
+ for page in reader.pages:
25
+ image_count += len(page.images)
26
+
27
+ if image_count > 0 and len(output_pdf) < 1000:
28
+ input_pdf_ocr = input_pdf.replace(".pdf", " - OCR.pdf")
29
+ ocrmypdf.ocr(input_pdf, input_pdf_ocr, force_ocr=True)
30
+
31
+ reader = PdfReader(input_pdf_ocr)
32
+ output_pdf = ""
33
+ for page in reader.pages:
34
+ output_pdf+=page.extract_text()
35
+
36
+ os.remove(input_pdf_ocr)
37
+
38
+ return output_pdf
39
+
40
+ # Inference
41
+
42
+ model_text = "meta-llama/Llama-3.2-3B-Instruct"
43
+ model_vision = "meta-llama/Llama-3.2-11B-Vision-Instruct"
44
+
45
+ client = InferenceClient()
46
+
47
+ def fn_text(
48
+ prompt,
49
+ history,
50
+ input,
51
+ #system_prompt,
52
+ max_tokens,
53
+ temperature,
54
+ top_p,
55
+ ):
56
+ if input:
57
+ if os.path.splitext(input)[1].lower() in [".png", ".jpg", ".jpeg"]:
58
+ output = fn_image_to_text(input)
59
+ if os.path.splitext(input)[1].lower() == ".pdf":
60
+ output = fn_pdf_to_text(input)
61
+ else:
62
+ output = ""
63
+
64
+ messages = [{"role": "system", "content": [{"type": "text", "text": output}]}]
65
+ #messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
66
+ history.append(messages[0])
67
+
68
+ messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
69
+ history.append(messages[1])
70
+
71
+ stream = client.chat.completions.create(
72
+ model = model_text,
73
+ messages = history,
74
+ max_tokens = max_tokens,
75
+ temperature = temperature,
76
+ top_p = top_p,
77
+ stream = True,
78
+ )
79
+
80
+ chunks = []
81
+ for chunk in stream:
82
+ chunks.append(chunk.choices[0].delta.content or "")
83
+ yield "".join(chunks)
84
+
85
+ app_text = gr.ChatInterface(
86
+ fn = fn_text,
87
+ type = "messages",
88
+ additional_inputs = [
89
+ gr.File(type="filepath", label="Input"),
90
+ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
91
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
92
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
93
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
94
+ ],
95
+ title = "Meta Llama",
96
+ description = model_text,
97
+ )
98
+
99
+ def fn_vision(
100
+ prompt,
101
+ image_url,
102
+ #system_prompt,
103
+ max_tokens,
104
+ temperature,
105
+ top_p,
106
+ ):
107
+ messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
108
+
109
+ if image_url:
110
+ messages[0]["content"].append({"type": "image_url", "image_url": {"url": image_url}})
111
+
112
+ stream = client.chat.completions.create(
113
+ model = model_vision,
114
+ messages = messages,
115
+ max_tokens = max_tokens,
116
+ temperature = temperature,
117
+ top_p = top_p,
118
+ stream = True,
119
+ )
120
+
121
+ chunks = []
122
+ for chunk in stream:
123
+ chunks.append(chunk.choices[0].delta.content or "")
124
+ yield "".join(chunks)
125
+
126
+ app_vision = gr.Interface(
127
+ fn = fn_vision,
128
+ inputs = [
129
+ gr.Textbox(label="Prompt"),
130
+ gr.Textbox(label="Image URL")
131
+ ],
132
+ outputs = [
133
+ gr.Textbox(label="Output")
134
+ ],
135
+ additional_inputs = [
136
+ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
137
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
138
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
139
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
140
+ ],
141
+ title = "Meta Llama",
142
+ description = model_vision,
143
+ )
144
+
145
+ app = gr.TabbedInterface(
146
+ [app_text, app_vision],
147
+ ["Text", "Vision"]
148
+ ).launch()
149
+
150
+ #if __name__ == "__main__":
151
+ # app.launch()