Geraldine commited on
Commit
e61c1cd
·
verified ·
1 Parent(s): 6770ac1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -16
app.py CHANGED
@@ -11,10 +11,12 @@ import base64
11
  import os, stat, io
12
 
13
  # Load the model in half-precision on the available device(s)
14
- model = Qwen2VLForConditionalGeneration.from_pretrained(
15
- "./Qwen2-VL-2B-Instruct-GPTQ-Int8", torch_dtype="auto", device_map="auto"
 
 
16
  )
17
- processor = AutoProcessor.from_pretrained("./Qwen2-VL-2B-Instruct-GPTQ-Int8")
18
 
19
  def array_to_image(image_array):
20
  if image_array is None:
@@ -44,25 +46,19 @@ def describe_image(image_array):
44
  }
45
  ]
46
 
47
- text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
48
- # Excepted output: '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>Describe this image.<|im_end|>\n<|im_start|>assistant\n'
49
 
50
- inputs = processor(
51
- text=[text_prompt], images=[image], padding=True, return_tensors="pt"
52
- )
53
- #inputs = inputs.to("cpu")
54
 
55
  # Inference: Generation of the output
56
- output_ids = model.generate(**inputs, max_new_tokens=128)
57
- generated_ids = [
58
- output_ids[len(input_ids) :]
59
- for input_ids, output_ids in zip(inputs.input_ids, output_ids)
60
  ]
61
  output_text = processor.batch_decode(
62
- generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
63
  )
64
- # remove image
65
- os.remove(image_path)
66
  # Extract the detailed description from the response
67
  return output_text, generate_embeddings(output_text)
68
 
 
11
  import os, stat, io
12
 
13
  # Load the model in half-precision on the available device(s)
14
+ model = AutoModelForVision2Seq.from_pretrained(
15
+ "./SmolVLM-500M-Instruct",
16
+ torch_dtype=torch.bfloat16,
17
+ _attn_implementation="eager"
18
  )
19
+ processor = AutoProcessor.from_pretrained("./SmolVLM-500M-Instruct")
20
 
21
  def array_to_image(image_array):
22
  if image_array is None:
 
46
  }
47
  ]
48
 
49
+ prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
 
50
 
51
+ inputs = processor(text=prompt, images=[image], return_tensors="pt")
 
 
 
52
 
53
  # Inference: Generation of the output
54
+ generated_ids = model.generate(**inputs, max_new_tokens=500)
55
+ output_ids = [
56
+ generated_ids[len(input_ids) :]
57
+ for input_ids, generated_ids in zip(inputs.input_ids, generated_ids)
58
  ]
59
  output_text = processor.batch_decode(
60
+ output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
61
  )
 
 
62
  # Extract the detailed description from the response
63
  return output_text, generate_embeddings(output_text)
64