gryan-galario commited on
Commit
ff3e8f8
·
1 Parent(s): 5bdac5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -3
app.py CHANGED
@@ -1,5 +1,41 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
 
 
3
 
4
- pipe = pipeline(model="kha-white/manga-ocr-base")
5
- gr.Interface.from_pipeline(pipe, title="Manga OCR", description="Japanese Character Recognization from Mangas", allow_flagging="never").launch(inbrowser=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModel, AutoFeatureExtractor
3
+ from PIL import Image
4
+ import requests
5
+ import torch
6
 
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")
9
+
10
+ model = AutoModel.from_pretrained("kha-white/manga-ocr-base")
11
+
12
+ feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")
13
+
14
+ def post_process(text):
15
+ text = ''.join(text.split())
16
+ text = text.replace('…', '...')
17
+ text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
18
+ text = jaconv.h2z(text, ascii=True, digit=True)
19
+ return text
20
+
21
+ def manga_ocr(img):
22
+ img = Image.open(img)
23
+ img = img.convert('L').convert('RGB')
24
+ pixel_values = self.feature_extractor(img, return_tensors="pt").pixel_values
25
+ output = model.generate(pixel_values)[0]
26
+ text = tokenizer.decode(ouput, skip_special_tokens=True)
27
+ text = post_process(text)
28
+ return text
29
+
30
+ iface = gr.Interface(
31
+ fn=infer,
32
+ inputs=[gr.inputs.Image(label="Input", type="pil")],
33
+ outputs="text",
34
+ layout="horizontal",
35
+ theme="huggingface",
36
+ title="Manga OCR",
37
+ description="Japanese Character Recognization from Mangas",
38
+ allow_flagging='never',
39
+ )
40
+
41
+ iface.launch(inbrowser=True)