JabriA commited on
Commit
b01b23a
·
1 Parent(s): 84f2a1e

Initial commit

Browse files
Files changed (3) hide show
  1. app.py +42 -4
  2. app.py.bak +42 -0
  3. requirements.txt +7 -0
app.py CHANGED
@@ -1,7 +1,45 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModel, AutoTokenizer
3
+ from PIL import Image
4
+ import torch
5
 
6
+ # Load model and tokenizer
7
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
8
+ model = AutoModel.from_pretrained(
9
+ 'ucaslcl/GOT-OCR2_0',
10
+ trust_remote_code=True,
11
+ low_cpu_mem_usage=True,
12
+ device_map='cuda' if torch.cuda.is_available() else 'cpu',
13
+ use_safetensors=True,
14
+ pad_token_id=tokenizer.eos_token_id
15
+ )
16
+ model = model.eval()
17
+ if torch.cuda.is_available():
18
+ model = model.cuda()
19
 
20
+ # OCR function
21
+ def ocr_from_image(image, ocr_type):
22
+ if image is None:
23
+ return "Please upload an image."
24
+ image_path = "uploaded_image.jpg"
25
+ image.save(image_path)
26
+ res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
27
+ return res
28
+
29
+ # OCR types to choose from
30
+ ocr_types = ["ocr", "format"]
31
+
32
+ # Gradio interface
33
+ iface = gr.Interface(
34
+ fn=ocr_from_image,
35
+ inputs=[
36
+ gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
37
+ gr.Radio(ocr_types, label="OCR Type", value="ocr")
38
+ ],
39
+ outputs="text",
40
+ title="🧠 GOT-OCR2.0 Transformer OCR",
41
+ description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
42
+ )
43
+
44
+ if __name__ == "__main__":
45
+ iface.launch()
app.py.bak ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModel, AutoTokenizer
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load model and tokenizer
7
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
8
+ model = AutoModel.from_pretrained(
9
+ 'ucaslcl/GOT-OCR2_0',
10
+ trust_remote_code=True,
11
+ low_cpu_mem_usage=True,
12
+ device_map='cuda' if torch.cuda.is_available() else 'cpu',
13
+ use_safetensors=True,
14
+ pad_token_id=tokenizer.eos_token_id
15
+ )
16
+ model = model.eval()
17
+ if torch.cuda.is_available():
18
+ model = model.cuda()
19
+
20
+ # OCR function
21
+ def ocr_from_image(image, ocr_type):
22
+ image_path = "temp.jpg"
23
+ image.save(image_path)
24
+ res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
25
+ return res
26
+
27
+ # Gradio interface
28
+ ocr_types = ["ocr", "format"]
29
+
30
+ iface = gr.Interface(
31
+ fn=ocr_from_image,
32
+ inputs=[
33
+ gr.Image(type="pil", label="Upload Image"),
34
+ gr.Radio(ocr_types, label="OCR Type", value="ocr")
35
+ ],
36
+ outputs="text",
37
+ title="GOT-OCR2.0: OCR with Transformers",
38
+ description="Upload an image and select OCR type (plain text or formatted)."
39
+ )
40
+
41
+ if __name__ == "__main__":
42
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch==2.0.1
3
+ torchvision==0.15.2
4
+ transformers==4.37.2
5
+ tiktoken==0.6.0
6
+ verovio==4.3.1
7
+ accelerate==0.28.0