damerajee commited on
Commit
39f38b0
·
verified ·
1 Parent(s): 1ff614d

Update modeling_gpt2vision.py

Browse files
Files changed (1) hide show
  1. modeling_gpt2vision.py +18 -1
modeling_gpt2vision.py CHANGED
@@ -7,13 +7,30 @@ from .vision_encoder import VisionEncoder
7
  from .configuration_gpt2vision import GPT2VisionConfig
8
  from .modeling_gpt2 import GPT2LMHeadModel
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  class GPT2Vision(PreTrainedModel):
11
  config_class = GPT2VisionConfig
12
 
13
  def __init__(self, config):
14
  super().__init__(config)
15
  self.vision_encoder = VisionEncoder()
16
-
 
 
 
 
17
  if isinstance(config.gpt2_config, dict):
18
  gpt2_config = GPT2Config(**config.gpt2_config)
19
  else:
 
7
  from .configuration_gpt2vision import GPT2VisionConfig
8
  from .modeling_gpt2 import GPT2LMHeadModel
9
 
10
+ IMAGE_TOKEN = "<image>"
11
+ ANSWER_EOS = "<|endoftext|>"
12
+
13
+ def resize_token_embeds(model_name="openai-community/gpt2"):
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ new_tokens = {
16
+ "additional_special_tokens": [IMAGE_TOKEN]
17
+ }
18
+ tokenizer.add_special_tokens(new_tokens)
19
+ return tokenizer
20
+
21
+ tokenizer = resize_token_embeds()
22
+
23
  class GPT2Vision(PreTrainedModel):
24
  config_class = GPT2VisionConfig
25
 
26
  def __init__(self, config):
27
  super().__init__(config)
28
  self.vision_encoder = VisionEncoder()
29
+ self.language_model.resize_token_embeddings(len(tokenizer))
30
+ self.tokenizer = tokenizer
31
+ tokenizer.pad_token = tokenizer.eos_token
32
+ self.image_token_id = self.tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
33
+
34
  if isinstance(config.gpt2_config, dict):
35
  gpt2_config = GPT2Config(**config.gpt2_config)
36
  else: