richardkimsm89 commited on
Commit
f64354b
·
verified ·
1 Parent(s): ea757dc

Create development/app

Browse files
Files changed (1) hide show
  1. development/app +153 -0
development/app ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import pytesseract
4
+ from pypdf import PdfReader
5
+ import ocrmypdf
6
+ from PIL import Image
7
+ import os
8
+
9
+ # Image to Text
10
+
11
+ def fn_image_to_text(input_image):
12
+ return pytesseract.image_to_string(Image.open(input_image))
13
+
14
+ # PDF to Text
15
+
16
+ def fn_pdf_to_text(input_pdf):
17
+ reader = PdfReader(input_pdf)
18
+
19
+ output_pdf = ""
20
+ for page in reader.pages:
21
+ output_pdf+=page.extract_text()
22
+
23
+ image_count = 0
24
+ for page in reader.pages:
25
+ image_count += len(page.images)
26
+
27
+ if image_count > 0 and len(output_pdf) < 1000:
28
+ input_pdf_ocr = input_pdf.replace(".pdf", " - OCR.pdf")
29
+ ocrmypdf.ocr(input_pdf, input_pdf_ocr, force_ocr=True)
30
+
31
+ reader = PdfReader(input_pdf_ocr)
32
+ output_pdf = ""
33
+ for page in reader.pages:
34
+ output_pdf+=page.extract_text()
35
+
36
+ os.remove(input_pdf_ocr)
37
+
38
+ return output_pdf
39
+
40
+ # Inference
41
+
42
+ model_text = "google/gemma-3-27b-it"
43
+ #model_text = "google/gemma-2-27b-it"
44
+ #model_vision = "google/paligemma2-3b-pt-224"
45
+
46
+ client = InferenceClient()
47
+
48
+ def fn_text(
49
+ prompt,
50
+ history,
51
+ input,
52
+ #system_prompt,
53
+ max_tokens,
54
+ temperature,
55
+ top_p,
56
+ ):
57
+ if input:
58
+ if os.path.splitext(input)[1].lower() in [".png", ".jpg", ".jpeg"]:
59
+ output = fn_image_to_text(input)
60
+ if os.path.splitext(input)[1].lower() == ".pdf":
61
+ output = fn_pdf_to_text(input)
62
+ else:
63
+ output = ""
64
+
65
+ #messages = [{"role": "system", "content": system_prompt}]
66
+ #history.append(messages[0])
67
+ #messages.append({"role": "user", "content": prompt})
68
+ #history.append(messages[1])
69
+
70
+ messages = [{"role": "user", "content": prompt + " " + output}]
71
+ history.append(messages[0])
72
+
73
+ stream = client.chat.completions.create(
74
+ model = model_text,
75
+ messages = history,
76
+ max_tokens = max_tokens,
77
+ temperature = temperature,
78
+ top_p = top_p,
79
+ stream = True,
80
+ )
81
+
82
+ chunks = []
83
+ for chunk in stream:
84
+ chunks.append(chunk.choices[0].delta.content or "")
85
+ yield "".join(chunks)
86
+
87
+ app_text = gr.ChatInterface(
88
+ fn = fn_text,
89
+ type = "messages",
90
+ additional_inputs = [
91
+ gr.File(type="filepath", label="Input"),
92
+ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
93
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
94
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
95
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
96
+ ],
97
+ title = "Google Gemma",
98
+ description = model_text,
99
+ )
100
+ """
101
+ def fn_vision(
102
+ prompt,
103
+ image_url,
104
+ #system_prompt,
105
+ max_tokens,
106
+ temperature,
107
+ top_p,
108
+ ):
109
+ messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
110
+
111
+ if image_url:
112
+ messages[0]["content"].append({"type": "image_url", "image_url": {"url": image_url}})
113
+
114
+ stream = client.chat.completions.create(
115
+ model = model_vision,
116
+ messages = messages,
117
+ max_tokens = max_tokens,
118
+ temperature = temperature,
119
+ top_p = top_p,
120
+ stream = True,
121
+ )
122
+
123
+ chunks = []
124
+ for chunk in stream:
125
+ chunks.append(chunk.choices[0].delta.content or "")
126
+ yield "".join(chunks)
127
+
128
+ app_vision = gr.Interface(
129
+ fn = fn_vision,
130
+ inputs = [
131
+ gr.Textbox(label="Prompt"),
132
+ gr.Textbox(label="Image URL")
133
+ ],
134
+ outputs = [
135
+ gr.Textbox(label="Output")
136
+ ],
137
+ additional_inputs = [
138
+ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
139
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
140
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
141
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
142
+ ],
143
+ title = "Google Gemma",
144
+ description = model_vision,
145
+ )
146
+ """
147
+ app = gr.TabbedInterface(
148
+ [app_text],
149
+ ["Text"]
150
+ ).launch()
151
+
152
+ #if __name__ == "__main__":
153
+ # app.launch()