Marco commited on
Commit
e6f1577
·
1 Parent(s): 1165597

Removed the preprocess fn from the modeling

Browse files
Files changed (1) hide show
  1. modeling_ovis.py +0 -69
modeling_ovis.py CHANGED
@@ -480,75 +480,6 @@ class Ovis(OvisPreTrainedModel):
480
  pad_sequence = torch.nn.utils.rnn.pad_sequence([i.flip(dims=[0]) for i in sequences],batch_first=True, padding_value=padding_value).flip(dims=[1])
481
  return pad_sequence[:,-self.config.multimodal_max_length:]
482
 
483
- def preprocess_inputs(
484
- self,
485
- text_or_conversations: Union[List[Dict], str],
486
- images: Optional[List[PIL.Image.Image]],
487
- max_partition=9,
488
- generation_preface='',
489
- return_labels=False,
490
- propagate_exception=True,
491
- frame_selector=None,
492
- frame_selector_kwargs=None
493
- ):
494
- # convert text to conversations
495
- if isinstance(text_or_conversations, str):
496
- conversations = [{
497
- "from": "human",
498
- "value": text_or_conversations
499
- }]
500
- elif isinstance(text_or_conversations, list):
501
- conversations = text_or_conversations
502
- else:
503
- raise ValueError(f'Invalid type of `text_or_conversations`, expected `List[Dict]` or `str`,'
504
- f' but got {type(text_or_conversations)}')
505
-
506
- if frame_selector is not None:
507
- frame_selector_kwargs = frame_selector_kwargs or {}
508
- conversations, images = frame_selector(conversations=conversations, frames=images, **frame_selector_kwargs)
509
-
510
- # format conversations
511
- prompt, raw_input_ids, raw_labels = self.get_conversation_formatter().format(
512
- conversations, generation_preface=generation_preface)
513
-
514
- # place image placeholders
515
- input_ids = []
516
- labels = []
517
- pixel_values = []
518
- invalidate_label = False
519
- image_token_indices = [i for i, v in enumerate(raw_input_ids) if v == IMAGE_TOKEN_ID]
520
- last_image_token_index = -1
521
- for i in range(len(image_token_indices)):
522
- head = 0 if i == 0 else image_token_indices[i - 1] + 1
523
- tail = image_token_indices[i]
524
- last_image_token_index = tail
525
- input_ids.extend(raw_input_ids[head:tail])
526
- labels.extend(raw_labels[head:tail])
527
- try:
528
- image = images[i]
529
- raw_pixel_values, image_placeholders = self.visual_tokenizer.preprocess_image(
530
- image, max_partition=max_partition)
531
- except Exception as e:
532
- if propagate_exception:
533
- raise e
534
- logging.exception(e)
535
- invalidate_label = True
536
- raw_pixel_values, image_placeholders = self.visual_tokenizer.mock_input()
537
- input_ids.extend(image_placeholders)
538
- labels.extend([IGNORE_ID] * len(image_placeholders))
539
- pixel_values.append(raw_pixel_values)
540
- input_ids.extend(raw_input_ids[last_image_token_index + 1:])
541
- labels.extend(raw_labels[last_image_token_index + 1:])
542
-
543
- # return tensors
544
- input_ids = torch.tensor(input_ids, dtype=torch.long)
545
- labels = torch.tensor([IGNORE_ID] * len(labels) if invalidate_label else labels, dtype=torch.long)
546
- pixel_values = torch.cat(pixel_values, dim=0) if len(pixel_values) > 0 else None
547
-
548
- if return_labels:
549
- return prompt, input_ids, pixel_values, labels
550
- else:
551
- return prompt, input_ids, pixel_values
552
 
553
  def save_pretrained(
554
  self,
 
480
  pad_sequence = torch.nn.utils.rnn.pad_sequence([i.flip(dims=[0]) for i in sequences],batch_first=True, padding_value=padding_value).flip(dims=[1])
481
  return pad_sequence[:,-self.config.multimodal_max_length:]
482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
 
484
  def save_pretrained(
485
  self,