richardkimsm89 commited on
Commit
e174db6
·
verified ·
1 Parent(s): 96281e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -5
app.py CHANGED
@@ -1,7 +1,43 @@
1
- # Inference
2
-
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  model_text = "meta-llama/Llama-3.2-3B-Instruct"
7
  model_vision = "meta-llama/Llama-3.2-11B-Vision-Instruct"
@@ -11,12 +47,22 @@ client = InferenceClient()
11
  def fn_text(
12
  prompt,
13
  history,
14
- system_prompt,
 
15
  max_tokens,
16
  temperature,
17
  top_p,
18
  ):
19
- messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
 
 
 
 
 
 
 
 
 
20
  history.append(messages[0])
21
 
22
  messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
@@ -40,7 +86,8 @@ app_text = gr.ChatInterface(
40
  fn = fn_text,
41
  type = "messages",
42
  additional_inputs = [
43
- gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
 
44
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
45
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
46
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import pytesseract
4
+ from PIL import Image
5
+ from pypdf import PdfReader
6
+ import ocrmypdf
7
+ import os
8
+
9
+ # Image to Text
10
+
11
+ def fn_image_to_text(input_image):
12
+ return pytesseract.image_to_string(Image.open(input_image))
13
+
14
+ # PDF to Text
15
+
16
+ def fn_pdf_to_text(input_pdf):
17
+ reader = PdfReader(input_pdf)
18
+
19
+ output_pdf = ""
20
+ for page in reader.pages:
21
+ output_pdf+=page.extract_text()
22
+
23
+ image_count = 0
24
+ for page in reader.pages:
25
+ image_count += len(page.images)
26
+
27
+ if image_count > 0 and len(output_pdf) < 1000:
28
+ input_pdf_ocr = input_pdf.replace(".pdf", " - OCR.pdf")
29
+ ocrmypdf.ocr(input_pdf, input_pdf_ocr, force_ocr=True)
30
+
31
+ reader = PdfReader(input_pdf_ocr)
32
+ output_pdf = ""
33
+ for page in reader.pages:
34
+ output_pdf+=page.extract_text()
35
+
36
+ os.remove(input_pdf_ocr)
37
+
38
+ return output_pdf
39
+
40
+ # Inference
41
 
42
  model_text = "meta-llama/Llama-3.2-3B-Instruct"
43
  model_vision = "meta-llama/Llama-3.2-11B-Vision-Instruct"
 
47
  def fn_text(
48
  prompt,
49
  history,
50
+ input,
51
+ #system_prompt,
52
  max_tokens,
53
  temperature,
54
  top_p,
55
  ):
56
+ if input:
57
+ if os.path.splitext(input)[1].lower() in [".png", ".jpg", ".jpeg"]:
58
+ output = fn_image_to_text(input)
59
+ if os.path.splitext(input)[1].lower() == ".pdf":
60
+ output = fn_pdf_to_text(input)
61
+ else:
62
+ output = ""
63
+
64
+ messages = [{"role": "system", "content": [{"type": "text", "text": output}]}]
65
+ #messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
66
  history.append(messages[0])
67
 
68
  messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
 
86
  fn = fn_text,
87
  type = "messages",
88
  additional_inputs = [
89
+ gr.File(type="filepath", label="Input"),
90
+ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
91
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
92
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
93
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),