prithivMLmods commited on
Commit
c755133
·
verified ·
1 Parent(s): 329195a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -15,6 +15,7 @@ import cv2
15
 
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
 
18
  Qwen2_5_VLForConditionalGeneration,
19
  AutoModelForImageTextToText,
20
  AutoProcessor,
@@ -66,6 +67,15 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
66
  ).to(device).eval()
67
  #--------------------------------------------------------------------------------------#
68
 
 
 
 
 
 
 
 
 
 
69
  def downsample_video(video_path):
70
  """
71
  Downsamples the video to evenly spaced frames.
@@ -106,6 +116,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
106
  elif model_name == "MonkeyOCR-Recognition":
107
  processor = processor_g
108
  model = model_g
 
 
 
109
  else:
110
  yield "Invalid model selected."
111
  return
@@ -160,6 +173,9 @@ def generate_video(model_name: str, text: str, video_path: str,
160
  elif model_name == "MonkeyOCR-Recognition":
161
  processor = processor_g
162
  model = model_g
 
 
 
163
  else:
164
  yield "Invalid model selected."
165
  return
@@ -259,7 +275,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
259
  with gr.Column():
260
  output = gr.Textbox(label="Output", interactive=False, lines=3, scale=2)
261
  model_choice = gr.Radio(
262
- choices=["docscopeOCR-7B-050425-exp", "MonkeyOCR-Recognition", "coreOCR-7B-050325-preview"],
263
  label="Select Model",
264
  value="docscopeOCR-7B-050425-exp"
265
  )
 
15
 
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
18
+ Glm4vForConditionalGeneration,
19
  Qwen2_5_VLForConditionalGeneration,
20
  AutoModelForImageTextToText,
21
  AutoProcessor,
 
67
  ).to(device).eval()
68
  #--------------------------------------------------------------------------------------#
69
 
70
+ # Load GLM-4.1V-9B-Thinking
71
+ MODEL_ID_O = "THUDM/GLM-4.1V-9B-Thinking"
72
+ processor_o = AutoProcessor.from_pretrained(MODEL_ID_O, trust_remote_code=True)
73
+ model_o = Glm4vForConditionalGeneration.from_pretrained(
74
+ MODEL_ID_O,
75
+ trust_remote_code=True,
76
+ torch_dtype=torch.float16
77
+ ).to(device).eval()
78
+
79
  def downsample_video(video_path):
80
  """
81
  Downsamples the video to evenly spaced frames.
 
116
  elif model_name == "MonkeyOCR-Recognition":
117
  processor = processor_g
118
  model = model_g
119
+ elif model_name == "GLM-4.1V-9B-Thinking":
120
+ processor = processor_o
121
+ model = model_o
122
  else:
123
  yield "Invalid model selected."
124
  return
 
173
  elif model_name == "MonkeyOCR-Recognition":
174
  processor = processor_g
175
  model = model_g
176
+ elif model_name == "GLM-4.1V-9B-Thinking":
177
+ processor = processor_o
178
+ model = model_o
179
  else:
180
  yield "Invalid model selected."
181
  return
 
275
  with gr.Column():
276
  output = gr.Textbox(label="Output", interactive=False, lines=3, scale=2)
277
  model_choice = gr.Radio(
278
+ choices=["docscopeOCR-7B-050425-exp", "GLM-4.1V-9B-Thinking", "MonkeyOCR-Recognition", "coreOCR-7B-050325-preview"],
279
  label="Select Model",
280
  value="docscopeOCR-7B-050425-exp"
281
  )