rynmurdock commited on
Commit
ce6d022
·
verified ·
1 Parent(s): 82ddb45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -132,8 +132,8 @@ processor = AutoProcessor.from_pretrained('google/paligemma-3b-pt-224')
132
 
133
  @spaces.GPU()
134
  def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None):
135
- inputs_embeds = pali.get_input_embeddings()(input_ids)
136
- selected_image_feature = image_outputs.to(dtype).to(device)
137
  image_features = pali.multi_modal_projector(selected_image_feature)
138
 
139
  if cache_position is None:
@@ -190,9 +190,9 @@ def generate_gpu(in_im_embs, prompt='the scene'):
190
  )
191
  im_emb = im_emb.detach().to('cpu').to(torch.float32)
192
  im = torchvision.transforms.ToTensor()(output.frames[0][len(output.frames[0])//2]).unsqueeze(0)
193
- im = torch.nn.functional.interpolate(im, (224, 224))
194
  im = (im - .5) * 2
195
- gemb = pali.vision_tower(im.to(device).to(dtype)).last_hidden_state.detach().to('cpu').to(torch.float32)
196
  return output, im_emb, gemb
197
 
198
 
 
132
 
133
  @spaces.GPU()
134
  def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None):
135
+ inputs_embeds = pali.get_input_embeddings()(input_ids.to('cuda'))
136
+ selected_image_feature = image_outputs.to(dtype).to('cuda')
137
  image_features = pali.multi_modal_projector(selected_image_feature)
138
 
139
  if cache_position is None:
 
190
  )
191
  im_emb = im_emb.detach().to('cpu').to(torch.float32)
192
  im = torchvision.transforms.ToTensor()(output.frames[0][len(output.frames[0])//2]).unsqueeze(0)
193
+ im = torch.nn.functional.interpolate(im, (224, 224)).to(dtype).to('cuda')
194
  im = (im - .5) * 2
195
+ gemb = pali.vision_tower(im).last_hidden_state.detach().to('cpu').to(torch.float32)
196
  return output, im_emb, gemb
197
 
198