prithivMLmods commited on
Commit
1590f58
·
verified ·
1 Parent(s): 304d9d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -7
app.py CHANGED
@@ -34,7 +34,7 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
34
  MODEL_ID_M,
35
  trust_remote_code=True,
36
  torch_dtype=torch.float16
37
- ).to("cuda").eval()
38
 
39
  # Load DocScope
40
  MODEL_ID_X = "prithivMLmods/docscopeOCR-7B-050425-exp"
@@ -43,7 +43,16 @@ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
43
  MODEL_ID_X,
44
  trust_remote_code=True,
45
  torch_dtype=torch.float16
46
- ).to("cuda").eval()
 
 
 
 
 
 
 
 
 
47
 
48
  def downsample_video(video_path):
49
  """
@@ -82,6 +91,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
82
  elif model_name == "docscopeOCR-7B-050425-exp":
83
  processor = processor_x
84
  model = model_x
 
 
 
85
  else:
86
  yield "Invalid model selected."
87
  return
@@ -105,7 +117,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
105
  padding=True,
106
  truncation=False,
107
  max_length=MAX_INPUT_TOKEN_LENGTH
108
- ).to("cuda")
109
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
110
  generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
111
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
@@ -133,6 +145,9 @@ def generate_video(model_name: str, text: str, video_path: str,
133
  elif model_name == "docscopeOCR-7B-050425-exp":
134
  processor = processor_x
135
  model = model_x
 
 
 
136
  else:
137
  yield "Invalid model selected."
138
  return
@@ -158,7 +173,7 @@ def generate_video(model_name: str, text: str, video_path: str,
158
  return_tensors="pt",
159
  truncation=False,
160
  max_length=MAX_INPUT_TOKEN_LENGTH
161
- ).to("cuda")
162
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
163
  generation_kwargs = {
164
  **inputs,
@@ -222,7 +237,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
222
  examples=video_examples,
223
  inputs=[video_query, video_upload]
224
  )
225
-
226
  with gr.Accordion("Advanced options", open=False):
227
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
228
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
@@ -232,9 +246,10 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
232
  with gr.Column():
233
  output = gr.Textbox(label="Output", interactive=False)
234
  model_choice = gr.Radio(
235
- choices=["Cosmos-Reason1-7B", "docscopeOCR-7B-050425-exp"],
236
  label="Select Model",
237
- value="Cosmos-Reason1-7B")
 
238
 
239
  image_submit.click(
240
  fn=generate_image,
 
34
  MODEL_ID_M,
35
  trust_remote_code=True,
36
  torch_dtype=torch.float16
37
+ ).to(device).eval()
38
 
39
  # Load DocScope
40
  MODEL_ID_X = "prithivMLmods/docscopeOCR-7B-050425-exp"
 
43
  MODEL_ID_X,
44
  trust_remote_code=True,
45
  torch_dtype=torch.float16
46
+ ).to(device).eval()
47
+
48
+ # Load InkScope Relaxed
49
+ MODEL_ID_Z = "prithivMLmods/Inkscope-Captions-2B-0526"
50
+ processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
51
+ model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
52
+ MODEL_ID_Z,
53
+ trust_remote_code=True,
54
+ torch_dtype=torch.float16
55
+ ).to(device).eval()
56
 
57
  def downsample_video(video_path):
58
  """
 
91
  elif model_name == "docscopeOCR-7B-050425-exp":
92
  processor = processor_x
93
  model = model_x
94
+ elif model_name == "Captions-Mini":
95
+ processor = processor_z
96
+ model = model_z
97
  else:
98
  yield "Invalid model selected."
99
  return
 
117
  padding=True,
118
  truncation=False,
119
  max_length=MAX_INPUT_TOKEN_LENGTH
120
+ ).to(device)
121
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
122
  generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
123
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
 
145
  elif model_name == "docscopeOCR-7B-050425-exp":
146
  processor = processor_x
147
  model = model_x
148
+ elif model_name == "Captions-Mini":
149
+ processor = processor_z
150
+ model = model_z
151
  else:
152
  yield "Invalid model selected."
153
  return
 
173
  return_tensors="pt",
174
  truncation=False,
175
  max_length=MAX_INPUT_TOKEN_LENGTH
176
+ ).to(device)
177
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
178
  generation_kwargs = {
179
  **inputs,
 
237
  examples=video_examples,
238
  inputs=[video_query, video_upload]
239
  )
 
240
  with gr.Accordion("Advanced options", open=False):
241
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
242
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
 
246
  with gr.Column():
247
  output = gr.Textbox(label="Output", interactive=False)
248
  model_choice = gr.Radio(
249
+ choices=["Cosmos-Reason1-7B", "docscopeOCR-7B-050425-exp", "Captions-Mini"],
250
  label="Select Model",
251
+ value="Cosmos-Reason1-7B"
252
+ )
253
 
254
  image_submit.click(
255
  fn=generate_image,