internlm
/

internlm-xcomposer-7b-4bit

Feature Extraction

Transformers

PyTorch

InternLMXComposer

custom_code

Model card Files Files and versions Community

myownskyW7 commited on Oct 12, 2023

Commit

eea2f56

1 Parent(s): 047819a

Upload modeling_InternLM_XComposer.py

Browse files

Files changed (1) hide show

modeling_InternLM_XComposer.py +30 -24

modeling_InternLM_XComposer.py CHANGED Viewed

@@ -26,6 +26,13 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
     config_class = InternLMXComposerConfig
     _auto_class = "AutoModelForCausalLM"
     gen_config = dict(
         num_beams=5,
         do_sample=False,
@@ -33,7 +40,7 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         repetition_penalty=1.5,
         length_penalty=1.0,
         temperature=1.0,
-        max_new_tokens=200,
     )
     def __init__(self, config):
@@ -74,13 +81,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
             # speed up init llm
             with torch.device('meta'):
                 self.internlm_model = InternLMForCausalLM._from_config(config)
-            self.internlm_model.to_empty(device=config.device).to(torch.float16)
         for n, m in self.internlm_model.named_modules():
             if 'lora' in n:
                 m.float()
         self.internlm_proj = nn.Linear(self.Qformer.config.hidden_size,
-                                    self.internlm_model.config.hidden_size)
         print('Done')
         self.vis_processor = transforms.Compose([
@@ -95,15 +103,13 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
     @property
     def eoh(self):
-        #return self.tokenizer.decode(torch.Tensor([103027]),
-        #                             skip_special_tokens=True)
-        return '<TOKENS_UNUSED_0>'
     @property
     def eoa(self):
-        #return self.tokenizer.decode(torch.Tensor([103028]),
-        #                             skip_special_tokens=True)
-        return '<TOKENS_UNUSED_1>'
     def maybe_autocast(self, dtype=torch.float16):
         # if on cpu, don't use autocast
@@ -121,16 +127,12 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
                      vision_width,
                      cross_attention_freq=2,
                      pretrain=True):
-        encoder_config = BertConfig.from_pretrained("bert-base-uncased")
         encoder_config.encoder_width = vision_width
         # insert cross-attention layer every other block
         encoder_config.add_cross_attention = True
         encoder_config.cross_attention_freq = cross_attention_freq
         encoder_config.query_length = num_query_token
-        # if pretrain:
-        #     Qformer = BertLMHeadModel.from_pretrained("bert-base-uncased",
-        #                                               config=encoder_config)
-        # else:
         Qformer = BertLMHeadModel(config=encoder_config)
         query_tokens = nn.Parameter(
             torch.zeros(1, num_query_token, encoder_config.hidden_size))
@@ -160,13 +162,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
                 encoder_attention_mask=image_atts,
                 return_dict=True,
             )
-            inputs_internlm = self.internlm_proj(query_output.last_hidden_state)
             inputs_internlm = torch.cat([
                 self.flag_image_start.expand(inputs_internlm.shape[0], -1, -1),
                 inputs_internlm,
                 self.flag_image_end.expand(inputs_internlm.shape[0], -1, -1)
             ],
-                                      dim=1)
         return inputs_internlm
     def encode_text(self, text, add_special_tokens=False):
@@ -196,16 +199,13 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         new_kargs = copy.deepcopy(self.gen_config)
         new_kargs.update(kwargs)
         return new_kargs
-    def forward(self, **kwargs):
-        return self.internlm_model(**kwargs)
     def generate(self, text, image=None, **kwargs):
         text_embeds = self.encode_text(text)
         img_embeds = self.encode_img(image)
         prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
-        out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
-                                                **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         return out_text
@@ -215,8 +215,8 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         prompt_embeds = self.wrap_prompt(text_embeds,
                                          img_embeds,
                                          history=history)
-        out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
-                                                **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         # trunc at eoh and eoa
@@ -240,7 +240,13 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
                     history=None,
                     add_special=True):
         if add_special:
-            prompt_segs = [' <|User|>:', f'\n{self.eoh} <|Bot|>:']
         else:
             prompt_segs = [' <|User|>:', ' <|Bot|>:']  # used in wrap history
         prompt_seg_embeds = []

     config_class = InternLMXComposerConfig
     _auto_class = "AutoModelForCausalLM"
+    meta_instruction = """meta instruction
+You are an AI assistant whose name is 浦语.
+- 浦语 is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
+- 浦语 can understand and communicate fluently in the language chosen by the user such as English and 中文.
+conversation
+"""
     gen_config = dict(
         num_beams=5,
         do_sample=False,
         repetition_penalty=1.5,
         length_penalty=1.0,
         temperature=1.0,
+        max_new_tokens=500,
     )
     def __init__(self, config):
             # speed up init llm
             with torch.device('meta'):
                 self.internlm_model = InternLMForCausalLM._from_config(config)
+            self.internlm_model.to_empty(device=config.device).to(
+                torch.float16)
         for n, m in self.internlm_model.named_modules():
             if 'lora' in n:
                 m.float()
         self.internlm_proj = nn.Linear(self.Qformer.config.hidden_size,
+                                       self.internlm_model.config.hidden_size)
         print('Done')
         self.vis_processor = transforms.Compose([
     @property
     def eoh(self):
+        return self.tokenizer.decode(torch.Tensor([103027]),
+                                     skip_special_tokens=True)
     @property
     def eoa(self):
+        return self.tokenizer.decode(torch.Tensor([103028]),
+                                     skip_special_tokens=True)
     def maybe_autocast(self, dtype=torch.float16):
         # if on cpu, don't use autocast
                      vision_width,
                      cross_attention_freq=2,
                      pretrain=True):
+        encoder_config = BertConfig()
         encoder_config.encoder_width = vision_width
         # insert cross-attention layer every other block
         encoder_config.add_cross_attention = True
         encoder_config.cross_attention_freq = cross_attention_freq
         encoder_config.query_length = num_query_token
         Qformer = BertLMHeadModel(config=encoder_config)
         query_tokens = nn.Parameter(
             torch.zeros(1, num_query_token, encoder_config.hidden_size))
                 encoder_attention_mask=image_atts,
                 return_dict=True,
             )
+            inputs_internlm = self.internlm_proj(
+                query_output.last_hidden_state)
             inputs_internlm = torch.cat([
                 self.flag_image_start.expand(inputs_internlm.shape[0], -1, -1),
                 inputs_internlm,
                 self.flag_image_end.expand(inputs_internlm.shape[0], -1, -1)
             ],
+                                        dim=1)
         return inputs_internlm
     def encode_text(self, text, add_special_tokens=False):
         new_kargs = copy.deepcopy(self.gen_config)
         new_kargs.update(kwargs)
         return new_kargs
     def generate(self, text, image=None, **kwargs):
         text_embeds = self.encode_text(text)
         img_embeds = self.encode_img(image)
         prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
+        out_embeds = self.internlm_model.generate(
+            inputs_embeds=prompt_embeds, **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         return out_text
         prompt_embeds = self.wrap_prompt(text_embeds,
                                          img_embeds,
                                          history=history)
+        out_embeds = self.internlm_model.generate(
+            inputs_embeds=prompt_embeds, **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         # trunc at eoh and eoa
                     history=None,
                     add_special=True):
         if add_special:
+            if history is None:
+                prompt_segs = [
+                    self.meta_instruction + ' <|User|>:',
+                    f'\n{self.eoh} <|Bot|>:'
+                ]
+            else:
+                prompt_segs = [' <|User|>:', f'\n{self.eoh} <|Bot|>:']
         else:
             prompt_segs = [' <|User|>:', ' <|Bot|>:']  # used in wrap history
         prompt_seg_embeds = []