nakamura196 commited on
Commit
1981742
Β·
1 Parent(s): f0d79a2

feat: initial commit

Browse files
Files changed (8) hide show
  1. .gitignore +5 -0
  2. .gitmodules +3 -0
  3. README.md +1 -1
  4. default.jpg +0 -0
  5. install.sh +4 -0
  6. ndlkotenocr-lite +1 -0
  7. reuirements.txt +14 -0
  8. src/app.py +95 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .venv
2
+ .DS_Store
3
+ __pycache__
4
+ src/*
5
+ !src/app.py
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "ndlkotenocr-lite"]
2
+ path = ndlkotenocr-lite
3
+ url = https://github.com/ndl-lab/ndlkotenocr-lite.git
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.7.1
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
 
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.7.1
8
+ app_file: src/app.py
9
  pinned: false
10
  ---
11
 
default.jpg ADDED
install.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/bash
2
+ # γ‚΅γƒ–γƒ’γ‚Έγƒ₯γƒΌγƒ«γ‚’εˆζœŸεŒ–γ—γ¦ζ›΄ζ–°
3
+ git submodule update --init --recursive
4
+ cp -rp ndlkotenocr-lite/src .
ndlkotenocr-lite ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 268e3534d87cff119af67e6451082d6ab1de1e5e
reuirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # git+https://github.com/ndl-lab/ndlkotenocr-lite
2
+ dill
3
+ flet
4
+ lxml
5
+ networkx
6
+ numpy
7
+ onnxruntime
8
+ pillow
9
+ ordered-set
10
+ protobuf
11
+ pyparsing
12
+ PyYAML
13
+ tqdm
14
+ gradio
src/app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import os
5
+ from rtmdet import RTMDet
6
+ from parseq import PARSEQ
7
+ from yaml import safe_load
8
+
9
+
10
+
11
+ # Model Heading and Description
12
+ model_heading = "YOLOv11x γγšγ—ε­—θͺθ­˜γ‚΅γƒΌγƒ“γ‚ΉοΌˆδΈ€ζ–‡ε­—οΌ‰"
13
+ description = """YOLOv11x γγšγ—ε­—θͺθ­˜γ‚΅γƒΌγƒ“γ‚ΉοΌˆδΈ€ζ–‡ε­—οΌ‰ Gradio demo for classification. Upload an image or click an example image to use."""
14
+
15
+ article = "<p style='text-align: center'>YOLOv11x γγšγ—ε­—θͺθ­˜γ‚΅γƒΌγƒ“γ‚ΉοΌˆδΈ€ζ–‡ε­—οΌ‰ is a classification model trained on the <a href=\"https://lab.hi.u-tokyo.ac.jp/datasets/kuzushiji\">ζ±δΊ¬ε€§ε­¦ε²ζ–™η·¨ηΊ‚ζ‰€γγšγ—ε­—γƒ‡γƒΌγ‚Ώγ‚»γƒƒγƒˆ</a>.</p>"
16
+
17
+ image_path = [
18
+ ['../default.jpg']
19
+ ]
20
+
21
+ # Functions to load models
22
+ def get_detector(weights_path, classes_path, device='cpu'):
23
+ assert os.path.isfile(weights_path), f"Weight file not found: {weights_path}"
24
+ assert os.path.isfile(classes_path), f"Classes file not found: {classes_path}"
25
+ return RTMDet(model_path=weights_path,
26
+ class_mapping_path=classes_path,
27
+ score_threshold=0.3,
28
+ conf_thresold=0.3,
29
+ iou_threshold=0.3,
30
+ device=device)
31
+
32
+ def get_recognizer(weights_path, classes_path, device='cpu'):
33
+ assert os.path.isfile(weights_path), f"Weight file not found: {weights_path}"
34
+ assert os.path.isfile(classes_path), f"Classes file not found: {classes_path}"
35
+
36
+ with open(classes_path, encoding="utf-8") as f:
37
+ charlist = list(safe_load(f)["model"]["charset_train"])
38
+ return PARSEQ(model_path=weights_path, charlist=charlist, device=device)
39
+
40
+ # YOLO Inference Function
41
+ def YOLOv11x_img_inference(image_path: str):
42
+ try:
43
+ # Load the models
44
+ detector = get_detector(
45
+ weights_path="model/rtmdet-s-1280x1280.onnx",
46
+ classes_path="config/ndl.yaml",
47
+ device="cpu"
48
+ )
49
+ recognizer = get_recognizer(
50
+ weights_path="model/parseq-ndl-32x384-tiny-10.onnx",
51
+ classes_path="config/NDLmoji.yaml",
52
+ device="cpu"
53
+ )
54
+
55
+ # Load image
56
+ pil_image = Image.open(image_path).convert('RGB')
57
+ npimg = np.array(pil_image)
58
+
59
+ # Object detection
60
+ detections = detector.detect(npimg)
61
+ result_json = []
62
+
63
+ # Text recognition
64
+ for det in detections:
65
+ xmin, ymin, xmax, ymax = det["box"]
66
+ line_img = npimg[int(ymin):int(ymax), int(xmin):int(xmax)]
67
+ text = recognizer.read(line_img)
68
+ result_json.append({
69
+ "boundingBox": [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]],
70
+ "text": text,
71
+ "confidence": det["confidence"]
72
+ })
73
+
74
+ # Return results in JSON format
75
+ return result_json
76
+ except Exception as e:
77
+ return {"error": str(e)}
78
+
79
+ # Gradio Inputs and Outputs
80
+ inputs_image = gr.Image(type="filepath", label="Input Image")
81
+ outputs_image = gr.JSON(label="Output JSON")
82
+
83
+ # Gradio Interface
84
+ demo = gr.Interface(
85
+ fn=YOLOv11x_img_inference,
86
+ inputs=inputs_image,
87
+ outputs=outputs_image,
88
+ title=model_heading,
89
+ description=description,
90
+ examples=image_path,
91
+ article=article,
92
+ cache_examples=False
93
+ )
94
+
95
+ demo.launch(share=False)