prithivMLmods commited on
Commit
a218ba6
·
verified ·
1 Parent(s): 6182cb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -11,7 +11,7 @@ from transformers import (
11
  Qwen2_5_VLForConditionalGeneration,
12
  Qwen2VLForConditionalGeneration,
13
  Glm4vForConditionalGeneration,
14
- AutoModelForVision2Seq,
15
  AutoProcessor,
16
  TextIteratorStreamer,
17
  )
@@ -62,10 +62,10 @@ model_s = Glm4vForConditionalGeneration.from_pretrained(
62
  torch_dtype=torch.float16
63
  ).to(device).eval()
64
 
65
- # Load kanana-1.5-v-3b-instruct
66
- MODEL_ID_F = "kakaocorp/kanana-1.5-v-3b-instruct"
67
  processor_f = AutoProcessor.from_pretrained(MODEL_ID_F, trust_remote_code=True)
68
- model_f = AutoModelForVision2Seq.from_pretrained(
69
  MODEL_ID_F,
70
  trust_remote_code=True,
71
  torch_dtype=torch.float16
@@ -113,7 +113,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
113
  elif model_name == "GLM-4.1V-9B-Thinking":
114
  processor = processor_s
115
  model = model_s
116
- elif model_name == "kanana-1.5-v-3b":
117
  processor = processor_f
118
  model = model_f
119
  else:
@@ -172,7 +172,7 @@ def generate_video(model_name: str, text: str, video_path: str,
172
  elif model_name == "GLM-4.1V-9B-Thinking":
173
  processor = processor_s
174
  model = model_s
175
- elif model_name == "kanana-1.5-v-3b":
176
  processor = processor_f
177
  model = model_f
178
  else:
@@ -290,7 +290,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
290
  markdown_output = gr.Markdown(label="(Result.md)")
291
 
292
  model_choice = gr.Radio(
293
- choices=["Camel-Doc-OCR-062825", "GLM-4.1V-9B-Thinking", "Megalodon-OCR-Sync-0713", "MonkeyOCR-pro-1.2B", "kanana-1.5-v-3b"],
294
  label="Select Model",
295
  value="Camel-Doc-OCR-062825"
296
  )
 
11
  Qwen2_5_VLForConditionalGeneration,
12
  Qwen2VLForConditionalGeneration,
13
  Glm4vForConditionalGeneration,
14
+ LlavaForConditionalGeneration,
15
  AutoProcessor,
16
  TextIteratorStreamer,
17
  )
 
62
  torch_dtype=torch.float16
63
  ).to(device).eval()
64
 
65
+ # Load llama-joycaption-beta-one-hf-llava
66
+ MODEL_ID_F = "fancyfeast/llama-joycaption-beta-one-hf-llava"
67
  processor_f = AutoProcessor.from_pretrained(MODEL_ID_F, trust_remote_code=True)
68
+ model_f = LlavaForConditionalGeneration.from_pretrained(
69
  MODEL_ID_F,
70
  trust_remote_code=True,
71
  torch_dtype=torch.float16
 
113
  elif model_name == "GLM-4.1V-9B-Thinking":
114
  processor = processor_s
115
  model = model_s
116
+ elif model_name == "joycaption-beta-one":
117
  processor = processor_f
118
  model = model_f
119
  else:
 
172
  elif model_name == "GLM-4.1V-9B-Thinking":
173
  processor = processor_s
174
  model = model_s
175
+ elif model_name == "joycaption-beta-one":
176
  processor = processor_f
177
  model = model_f
178
  else:
 
290
  markdown_output = gr.Markdown(label="(Result.md)")
291
 
292
  model_choice = gr.Radio(
293
+ choices=["Camel-Doc-OCR-062825", "GLM-4.1V-9B-Thinking", "Megalodon-OCR-Sync-0713", "MonkeyOCR-pro-1.2B", "joycaption-beta-one"],
294
  label="Select Model",
295
  value="Camel-Doc-OCR-062825"
296
  )