Mochi1

Build error

App Files Files Community

Haoxin Chen commited on Apr 6, 2023

Commit

62b6d65

1 Parent(s): 3ab49e3

update change_lora func

Browse files

Files changed (3) hide show

app.py +1 -1
lvdm/models/modules/lora.py +77 -0
videocrafter_test.py +5 -4

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ def videocrafter_demo(result_dir='./tmp/'):
             with gr.Tab(label="VideoCrafter"):
                 input_text = gr.Text()
                 model_choices=['origin','vangogh','frozen','yourname', 'coco']
-                trigger_word_list=['','Loving Vincent style', 'frozenmovie style', 'MakotoShinkaiYourName style', 'coco style']
                 with gr.Row():
                     model_index = gr.Dropdown(label='Models', elem_id=f"model", choices=model_choices, value=model_choices[0], type="index",interactive=True)

             with gr.Tab(label="VideoCrafter"):
                 input_text = gr.Text()
                 model_choices=['origin','vangogh','frozen','yourname', 'coco']
+                trigger_word_list=[' ','Loving Vincent style', 'frozenmovie style', 'MakotoShinkaiYourName style', 'coco style']
                 with gr.Row():
                     model_index = gr.Dropdown(label='Models', elem_id=f"model", choices=model_choices, value=model_choices[0], type="index",interactive=True)

lvdm/models/modules/lora.py CHANGED Viewed

@@ -680,6 +680,83 @@ def change_lora(model, inject_lora=False, lora_scale=1.0, lora_path='', last_tim
         net_load_lora(model, lora_path, alpha=lora_scale)
 def load_safeloras(path, device="cpu"):
     safeloras = safe_open(path, framework="pt", device=device)

         net_load_lora(model, lora_path, alpha=lora_scale)
+def net_load_lora_v2(net, checkpoint_path, alpha=1.0, remove=False, origin_weight=None):
+    visited=[]
+    state_dict = torch.load(checkpoint_path)
+    for k, v in state_dict.items():
+        state_dict[k] = v.to(net.device)
+    # import pdb;pdb.set_trace()
+    for key in state_dict:
+        if ".alpha" in key or key in visited:
+            continue
+        layer_infos = key.split(".")[:-2] # remove lora_up and down weight
+        curr_layer = net
+        # find the target layer
+        temp_name = layer_infos.pop(0)
+        while len(layer_infos) > -1:
+            curr_layer = curr_layer.__getattr__(temp_name)
+            if len(layer_infos) > 0:
+                temp_name = layer_infos.pop(0)
+            elif len(layer_infos) == 0:
+                break
+        if curr_layer.__class__ not in [nn.Linear, nn.Conv2d]:
+            print('missing param at:', key)
+            continue
+        pair_keys = []
+        if "lora_down" in key:
+            pair_keys.append(key.replace("lora_down", "lora_up"))
+            pair_keys.append(key)
+        else:
+            pair_keys.append(key)
+            pair_keys.append(key.replace("lora_up", "lora_down"))
+        # storage weight
+        if origin_weight is None:
+            origin_weight = dict()
+            storage_key = key.replace("lora_down", "lora").replace("lora_up", "lora")
+            origin_weight[storage_key] = curr_layer.weight.data.clone()
+        else:
+            storage_key = key.replace("lora_down", "lora").replace("lora_up", "lora")
+            if storage_key not in origin_weight.keys():
+                origin_weight[storage_key] = curr_layer.weight.data.clone()
+        # update
+        if len(state_dict[pair_keys[0]].shape) == 4:
+            # for conv
+            if remove:
+                curr_layer.weight.data = origin_weight[storage_key].clone()
+            else:
+                weight_up = state_dict[pair_keys[0]].squeeze(3).squeeze(2).to(torch.float32)
+                weight_down = state_dict[pair_keys[1]].squeeze(3).squeeze(2).to(torch.float32)
+                curr_layer.weight.data += alpha * torch.mm(weight_up, weight_down).unsqueeze(2).unsqueeze(3)
+        else:
+            # for linear
+            if remove:
+                curr_layer.weight.data = origin_weight[storage_key].clone()
+            else:
+                weight_up = state_dict[pair_keys[0]].to(torch.float32)
+                weight_down = state_dict[pair_keys[1]].to(torch.float32)
+                curr_layer.weight.data += alpha * torch.mm(weight_up, weight_down)
+        # update visited list
+        for item in pair_keys:
+            visited.append(item)
+    print('load_weight_num:',len(visited))
+    return origin_weight
+def change_lora_v2(model, inject_lora=False, lora_scale=1.0, lora_path='', last_time_lora='', last_time_lora_scale=1.0, origin_weight=None):
+    # remove lora
+    if last_time_lora != '':
+        origin_weight = net_load_lora_v2(model, last_time_lora, alpha=last_time_lora_scale, remove=True, origin_weight=origin_weight)
+    # add new lora
+    if inject_lora:
+        origin_weight = net_load_lora_v2(model, lora_path, alpha=lora_scale, origin_weight=origin_weight)
+    return origin_weight
 def load_safeloras(path, device="cpu"):
     safeloras = safe_open(path, framework="pt", device=device)

videocrafter_test.py CHANGED Viewed

@@ -13,7 +13,7 @@ from lvdm.utils.common_utils import str2bool
 from lvdm.utils.saving_utils import npz_to_video_grid, npz_to_imgsheet_5d
 from scripts.sample_text2video import sample_text2video
 from scripts.sample_utils import load_model, get_conditions, make_model_input_shape, torch_to_np
-from lvdm.models.modules.lora import change_lora
 from huggingface_hub import hf_hub_download
@@ -50,13 +50,14 @@ class Text2Video():
         self.result_dir = result_dir
         self.save_fps = 8
         self.ddim_sampler = DDIMSampler(model)
     def get_prompt(self, input_text, steps=50, model_index=0, eta=1.0, cfg_scale=15.0, lora_scale=1.0, trigger_word=''):
-        if trigger_word !='':
             input_text = input_text + ', ' + trigger_word
         inject_lora = model_index > 0
-        change_lora(self.model, inject_lora=inject_lora, lora_scale=lora_scale, lora_path=self.lora_path_list[model_index],
-                    last_time_lora=self.last_time_lora, last_time_lora_scale=self.last_time_lora_scale)
         all_videos = sample_text2video(self.model, input_text, n_samples=1, batch_size=1,
                         sample_type='ddim', sampler=self.ddim_sampler,

 from lvdm.utils.saving_utils import npz_to_video_grid, npz_to_imgsheet_5d
 from scripts.sample_text2video import sample_text2video
 from scripts.sample_utils import load_model, get_conditions, make_model_input_shape, torch_to_np
+from lvdm.models.modules.lora import change_lora, change_lora_v2
 from huggingface_hub import hf_hub_download
         self.result_dir = result_dir
         self.save_fps = 8
         self.ddim_sampler = DDIMSampler(model)
+        self.origin_weight = None
     def get_prompt(self, input_text, steps=50, model_index=0, eta=1.0, cfg_scale=15.0, lora_scale=1.0, trigger_word=''):
+        if trigger_word !=' ':
             input_text = input_text + ', ' + trigger_word
         inject_lora = model_index > 0
+        self.origin_weight = change_lora_v2(self.model, inject_lora=inject_lora, lora_scale=lora_scale, lora_path=self.lora_path_list[model_index],
+                    last_time_lora=self.last_time_lora, last_time_lora_scale=self.last_time_lora_scale, origin_weight=self.origin_weight)
         all_videos = sample_text2video(self.model, input_text, n_samples=1, batch_size=1,
                         sample_type='ddim', sampler=self.ddim_sampler,