File size: 2,878 Bytes
83edfed
4ea50ff
adfbf25
4ea50ff
83edfed
12c0c35
 
83edfed
 
 
 
 
 
 
1514280
3bb6945
83edfed
 
1514280
83edfed
1514280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83edfed
 
1514280
83edfed
 
 
 
 
 
 
 
1514280
 
83edfed
c83fd08
 
83edfed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
os.system('pip install --upgrade --no-cache-dir gdown')
os.system('gdown -O ./output/ctw/model_ctw.pth 1Ajslu_9WisuZ2nJGzE6qbD87aK6_ozzA')
os.system('gdown -O ./workdir.zip 1mYM_26qHUom_5NU7iutHneB_KHlLjL5y')
os.system('unzip workdir.zip')
os.system('pip install "git+https://github.com/philferriere/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI"')
os.system('python setup.py build develop --user')

import glob
import gradio as gr
from demo import get_model, preprocess, postprocess, load
from utils import Config, Logger, CharsetMapper

def process_image(image):
  # rec model
  config = Config('configs/rec/train_abinet.yaml')
  config.model_vision_checkpoint = None
  model = get_model(config)
  model = load(model, 'workdir/train-abinet/best-train-abinet.pth')
  charset = CharsetMapper(filename=config.dataset_charset_path, max_length=config.dataset_max_length + 1)
  
  # det model
  cfg.merge_from_file('./configs/det/r50_baseline.yaml')
  cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
  det_demo = DetDemo(
      cfg,
      min_image_size=800,
      confidence_threshold=0.7,
      output_polygon=True
  )
  
  # detect
  image = cv2.imread(filepath)
  result_polygons, result_masks, result_boxes = det_demo.run_on_opencv_image(image)
  
  # cut patch
  patchs = [image[box[1]:box[3], box[0]:box[2], :] for box in result_boxes]
  patchs = [preprocess(patch, config.dataset_image_width, config.dataset_image_height) for patch in patchs]
  patchs = torch.stack(patchs, dim=0)
  print(patchs.shape)
  res = model(patchs)
  rec_result = postprocess(res, charset, 'alignment')[0]
  print(rec_result)
  
  # visual detect results
  visual_image = det_demo.visualization(image.copy(), result_polygons, result_masks, result_boxes)
  cv2.imwrite('result.jpg', visual_image)
  return 'result.jpg'#, pd.DataFrame(result_words)
  

  img = image.convert('RGB')
  img = preprocess(img, config.dataset_image_width, config.dataset_image_height)
  res = model(img)
  return postprocess(res, charset, 'alignment')[0][0]

title = "张博强毕设中期展示(文本识别部分)"
description = "西北工业大学航海学院张博强毕设,目前识别部分进度为复现abinet,本网页为abinet复现的可视化web端展示"
#article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2103.06495.pdf'>Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition</a> | <a href='https://github.com/FangShancheng/ABINet'>Github Repo</a></p>"

iface = gr.Interface(fn=process_image, 
                     inputs=[gr.inputs.Image(label="image", type="filepath")], 
                     outputs=[gr.outputs.Image(), gr.outputs.Textbox()],
                     title=title,
                     description=description,
                     examples=glob.glob('figs/test/*.png'))
iface.launch(enable_queue=True)