richardkimsm89 commited on
Commit
8ae4bcc
·
verified ·
1 Parent(s): 7fdc8ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -49
app.py CHANGED
@@ -1,41 +1,5 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import pytesseract
4
- from PIL import Image
5
- from pypdf import PdfReader
6
- import ocrmypdf
7
- import os
8
-
9
- # Image to Text
10
-
11
- def fn_image_to_text(input_image):
12
- return pytesseract.image_to_string(Image.open(input_image))
13
-
14
- # PDF to Text
15
-
16
- def fn_pdf_to_text(input_pdf):
17
- reader = PdfReader(input_pdf)
18
-
19
- output_pdf = ""
20
- for page in reader.pages:
21
- output_pdf+=page.extract_text()
22
-
23
- image_count = 0
24
- for page in reader.pages:
25
- image_count += len(page.images)
26
-
27
- if image_count > 0 and len(output_pdf) < 1000:
28
- input_pdf_ocr = input_pdf.replace(".pdf", " - OCR.pdf")
29
- ocrmypdf.ocr(input_pdf, input_pdf_ocr, force_ocr=True)
30
-
31
- reader = PdfReader(input_pdf_ocr)
32
- output_pdf = ""
33
- for page in reader.pages:
34
- output_pdf+=page.extract_text()
35
-
36
- os.remove(input_pdf_ocr)
37
-
38
- return output_pdf
39
 
40
  # Inference
41
 
@@ -47,22 +11,13 @@ client = InferenceClient()
47
  def fn_text(
48
  prompt,
49
  history,
50
- input,
51
- #system_prompt,
52
  max_tokens,
53
  temperature,
54
  top_p,
55
  ):
56
- if input:
57
- if os.path.splitext(input)[1].lower() in [".png", ".jpg", ".jpeg"]:
58
- output = fn_image_to_text(input)
59
- if os.path.splitext(input)[1].lower() == ".pdf":
60
- output = fn_pdf_to_text(input)
61
- else:
62
- output = ""
63
 
64
- messages = [{"role": "system", "content": [{"type": "text", "text": output}]}]
65
- #messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
66
  history.append(messages[0])
67
 
68
  messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
@@ -86,8 +41,7 @@ app_text = gr.ChatInterface(
86
  fn = fn_text,
87
  type = "messages",
88
  additional_inputs = [
89
- gr.File(type="filepath", label="Input"),
90
- #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
91
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
92
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
93
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # Inference
5
 
 
11
  def fn_text(
12
  prompt,
13
  history,
14
+ system_prompt,
 
15
  max_tokens,
16
  temperature,
17
  top_p,
18
  ):
 
 
 
 
 
 
 
19
 
20
+ messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
 
21
  history.append(messages[0])
22
 
23
  messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
 
41
  fn = fn_text,
42
  type = "messages",
43
  additional_inputs = [
44
+ gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
 
45
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
46
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
47
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),