zamal commited on
Commit
64871c7
·
verified ·
1 Parent(s): c3347e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -2,17 +2,15 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
3
  from PIL import Image
4
  import torch
5
- import spaces
6
 
7
  # Define the repository for the quantized model
8
  repo_name = "cyan2k/molmo-7B-D-bnb-4bit"
9
  arguments = {"device_map": "auto", "torch_dtype": "auto", "trust_remote_code": True}
10
 
11
- # Load the processor and quantized model, ensure they are on the GPU
12
  processor = AutoProcessor.from_pretrained(repo_name, **arguments)
13
  model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to("cuda")
14
 
15
- @spaces.GPU(duration=120)
16
  def process_image_and_text(image, text):
17
  # Process the image and text
18
  inputs = processor.process(
@@ -24,16 +22,13 @@ def process_image_and_text(image, text):
24
  inputs = {k: v.to("cuda").unsqueeze(0) for k, v in inputs.items()}
25
 
26
  # Generate output
27
- output = model.generate_from_batch(
28
- inputs,
29
- GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
30
- tokenizer=processor.tokenizer
31
  )
32
 
33
  # Only get generated tokens; decode them to text
34
- generated_tokens = output[0, inputs['input_ids'].size(1):]
35
- generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
36
-
37
  return generated_text
38
 
39
  def chatbot(image, text, history):
@@ -46,7 +41,7 @@ def chatbot(image, text, history):
46
 
47
  # Define the Gradio interface
48
  with gr.Blocks() as demo:
49
- gr.Markdown("# Image Chatbot with Molmo-7B-D-0924")
50
 
51
  with gr.Row():
52
  image_input = gr.Image(type="numpy")
 
2
  from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
3
  from PIL import Image
4
  import torch
 
5
 
6
  # Define the repository for the quantized model
7
  repo_name = "cyan2k/molmo-7B-D-bnb-4bit"
8
  arguments = {"device_map": "auto", "torch_dtype": "auto", "trust_remote_code": True}
9
 
10
+ # Load the processor and quantized model
11
  processor = AutoProcessor.from_pretrained(repo_name, **arguments)
12
  model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to("cuda")
13
 
 
14
  def process_image_and_text(image, text):
15
  # Process the image and text
16
  inputs = processor.process(
 
22
  inputs = {k: v.to("cuda").unsqueeze(0) for k, v in inputs.items()}
23
 
24
  # Generate output
25
+ output = model.generate(
26
+ **inputs,
27
+ max_new_tokens=200
 
28
  )
29
 
30
  # Only get generated tokens; decode them to text
31
+ generated_text = processor.decode(output, skip_special_tokens=True)
 
 
32
  return generated_text
33
 
34
  def chatbot(image, text, history):
 
41
 
42
  # Define the Gradio interface
43
  with gr.Blocks() as demo:
44
+ gr.Markdown("# Image Chatbot with Molmo-7B-4 Bit Quantized")
45
 
46
  with gr.Row():
47
  image_input = gr.Image(type="numpy")