Spaces:

Adapter
/

T2I-Adapter

Runtime error

App Files Files Community

Adapter commited on Feb 24, 2023

Commit

6d93939

1 Parent(s): 2254a67

add compose

Browse files

Files changed (3) hide show

app.py +4 -2
demo/demos.py +32 -0
demo/model.py +106 -4

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ os.system('mim install mmcv-full==1.7.0')
 from demo.model import Model_all
 import gradio as gr
-from demo.demos import create_demo_keypose, create_demo_sketch, create_demo_draw, create_demo_seg, create_demo_depth
 import torch
 import subprocess
 import shlex
@@ -44,7 +44,7 @@ for url in urls_mmpose:
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model = Model_all(device)
-DESCRIPTION = '''# T2I-Adapter (Sketch & Keypose & Segmentation)
 [Paper](https://arxiv.org/abs/2302.08453)               [GitHub](https://github.com/TencentARC/T2I-Adapter)
 This gradio demo is for a simple experience of T2I-Adapter:
@@ -74,5 +74,7 @@ with gr.Blocks(css='style.css') as demo:
             create_demo_seg(model.process_seg)
         with gr.TabItem('Depth'):
             create_demo_depth(model.process_depth)
 demo.queue().launch(debug=True, server_name='0.0.0.0')

 from demo.model import Model_all
 import gradio as gr
+from demo.demos import create_demo_keypose, create_demo_sketch, create_demo_draw, create_demo_seg, create_demo_depth, create_demo_depth_keypose
 import torch
 import subprocess
 import shlex
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model = Model_all(device)
+DESCRIPTION = '''# T2I-Adapter (Sketch & Keypose & Segmentation & Depth)
 [Paper](https://arxiv.org/abs/2302.08453)               [GitHub](https://github.com/TencentARC/T2I-Adapter)
 This gradio demo is for a simple experience of T2I-Adapter:
             create_demo_seg(model.process_seg)
         with gr.TabItem('Depth'):
             create_demo_depth(model.process_depth)
+        with gr.TabItem('Multi-adapters (Depth & Keypose)'):
+            create_demo_depth_keypose(model.process_depth_keypose)
 demo.queue().launch(debug=True, server_name='0.0.0.0')

demo/demos.py CHANGED Viewed

@@ -120,6 +120,38 @@ def create_demo_depth(process):
         run_button.click(fn=process, inputs=ips, outputs=[result])
     return demo
 def create_demo_draw(process):
     with gr.Blocks() as demo:
         with gr.Row():

         run_button.click(fn=process, inputs=ips, outputs=[result])
     return demo
+def create_demo_depth_keypose(process):
+    with gr.Blocks() as demo:
+        with gr.Row():
+            gr.Markdown('## T2I-Adapter (Depth)')
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    input_img_depth = gr.Image(source='upload', type="numpy", label='Depth guidance')
+                    input_img_keypose = gr.Image(source='upload', type="numpy", label='Keypose guidance')
+                prompt = gr.Textbox(label="Prompt")
+                neg_prompt = gr.Textbox(label="Negative Prompt",
+                value='ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face')
+                pos_prompt = gr.Textbox(label="Positive Prompt",
+                value = 'crafted, elegant, meticulous, magnificent, maximum details, extremely hyper aesthetic, intricately detailed')
+                with gr.Row():
+                    type_in_depth = gr.inputs.Radio(['Depth', 'Image'], type="value", default='Image', label='You can input an image or a depth map')
+                    type_in_keypose = gr.inputs.Radio(['Keypose', 'Image'], type="value", default='Image', label='You can input an image or a keypose map (mmpose style)')
+                with gr.Row():
+                    w_depth = gr.Slider(label="Depth guidance weight", minimum=0, maximum=2, value=1.0, step=0.1)
+                    w_keypose = gr.Slider(label="Keypose guidance weight", minimum=0, maximum=2, value=1.5, step=0.1)
+                run_button = gr.Button(label="Run")
+                con_strength = gr.Slider(label="Controling Strength (The guidance strength of the multi-guidance to the result)", minimum=0, maximum=1, value=1, step=0.1)
+                scale = gr.Slider(label="Guidance Scale (Classifier free guidance)", minimum=0.1, maximum=30.0, value=7.5, step=0.1)
+                fix_sample = gr.inputs.Radio(['True', 'False'], type="value", default='False', label='Fix Sampling\n (Fix the random seed)')
+                base_model = gr.inputs.Radio(['sd-v1-4.ckpt', 'anything-v4.0-pruned.ckpt'], type="value", default='sd-v1-4.ckpt', label='The base model you want to use')
+            with gr.Column():
+                result = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=3, height='auto')
+            ips = [input_img_depth, input_img_keypose, type_in_depth, type_in_keypose, w_depth, w_keypose, prompt, neg_prompt, pos_prompt, fix_sample, scale, con_strength, base_model]
+        run_button.click(fn=process, inputs=ips, outputs=[result])
+    return demo
 def create_demo_draw(process):
     with gr.Blocks() as demo:
         with gr.Row():

demo/model.py CHANGED Viewed

@@ -135,8 +135,8 @@ class Model_all:
         # sketch part
         self.model_sketch = Adapter(channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True,
-                                    use_conv=False).to(device)
-        self.model_sketch.load_state_dict(torch.load("models/t2iadapter_sketch_sd14v1.pth", map_location=device))
         self.model_edge = pidinet().to(device)
         self.model_edge.load_state_dict({k.replace('module.', ''): v for k, v in torch.load('models/table5_pidinet.pth', map_location=device)['state_dict'].items()})
@@ -144,8 +144,8 @@ class Model_all:
         self.model_seger = seger().to(device)
         self.model_seger.eval()
         self.coler = Colorize(n=182)
-        self.model_seg = Adapter(cin=int(3*64), channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True, use_conv=False).to(device)
-        self.model_seg.load_state_dict(torch.load("models/t2iadapter_seg_sd14v1.pth", map_location=device))
         self.depth_model = MiDaSInference(model_type='dpt_hybrid').to(device)
         # depth part
@@ -311,6 +311,108 @@ class Model_all:
         return [im_depth, x_samples_ddim]
     @torch.no_grad()
     def process_seg(self, input_img, type_in, prompt, neg_prompt, pos_prompt, fix_sample, scale,
                        con_strength, base_model):

         # sketch part
         self.model_sketch = Adapter(channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True,
+                                    use_conv=False)#.to(device)
+        # self.model_sketch.load_state_dict(torch.load("models/t2iadapter_sketch_sd14v1.pth", map_location=device))
         self.model_edge = pidinet().to(device)
         self.model_edge.load_state_dict({k.replace('module.', ''): v for k, v in torch.load('models/table5_pidinet.pth', map_location=device)['state_dict'].items()})
         self.model_seger = seger().to(device)
         self.model_seger.eval()
         self.coler = Colorize(n=182)
+        self.model_seg = Adapter(cin=int(3*64), channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True, use_conv=False)#.to(device)
+        # self.model_seg.load_state_dict(torch.load("models/t2iadapter_seg_sd14v1.pth", map_location=device))
         self.depth_model = MiDaSInference(model_type='dpt_hybrid').to(device)
         # depth part
         return [im_depth, x_samples_ddim]
+    @torch.no_grad()
+    def process_depth_keypose(self, input_img_depth, input_img_keypose, type_in_depth, type_in_keypose, w_depth, w_keypose, prompt, neg_prompt, pos_prompt, fix_sample, scale, con_strength, base_model):
+        if self.current_base != base_model:
+            ckpt = os.path.join("models", base_model)
+            pl_sd = torch.load(ckpt, map_location="cuda")
+            if "state_dict" in pl_sd:
+                sd = pl_sd["state_dict"]
+            else:
+                sd = pl_sd
+            self.base_model.load_state_dict(sd, strict=False)
+            self.current_base = base_model
+            if 'anything' in base_model.lower():
+                self.load_vae()
+        if fix_sample == 'True':
+            seed_everything(42)
+        im_depth = cv2.resize(input_img_depth, (512, 512))
+        im_keypose = cv2.resize(input_img_keypose, (512, 512))
+        # get depth
+        if type_in_depth == 'Depth':
+            im_depth_out = im_depth.copy()
+            depth = img2tensor(im).unsqueeze(0) / 255.
+        elif type_in_depth == 'Image':
+            im_depth = img2tensor(im_depth).unsqueeze(0) / 127.5 - 1.0
+            depth = self.depth_model(im_depth.to(self.device)).repeat(1, 3, 1, 1)
+            depth -= torch.min(depth)
+            depth /= torch.max(depth)
+            im_depth_out = tensor2img(depth)
+        # get keypose
+        if type_in_keypose == 'Keypose':
+            im_keypose_out = im_keypose.copy()
+            pose = img2tensor(im_keypose).unsqueeze(0) / 255.
+        elif type_in_keypose == 'Image':
+            image = im_keypose.copy()
+            im_keypose = img2tensor(im_keypose).unsqueeze(0) / 255.
+            mmdet_results = inference_detector(self.det_model, image)
+            # keep the person class bounding boxes.
+            person_results = process_mmdet_results(mmdet_results, self.det_cat_id)
+            # optional
+            return_heatmap = False
+            dataset = self.pose_model.cfg.data['test']['type']
+            # e.g. use ('backbone', ) to return backbone feature
+            output_layer_names = None
+            pose_results, _ = inference_top_down_pose_model(
+                self.pose_model,
+                image,
+                person_results,
+                bbox_thr=self.bbox_thr,
+                format='xyxy',
+                dataset=dataset,
+                dataset_info=None,
+                return_heatmap=return_heatmap,
+                outputs=output_layer_names)
+            # show the results
+            im_keypose_out = imshow_keypoints(
+                image,
+                pose_results,
+                skeleton=self.skeleton,
+                pose_kpt_color=self.pose_kpt_color,
+                pose_link_color=self.pose_link_color,
+                radius=2,
+                thickness=2)
+        # extract condition features
+        c = self.base_model.get_learned_conditioning([prompt + ', ' + pos_prompt])
+        nc = self.base_model.get_learned_conditioning([neg_prompt])
+        features_adapter_depth = self.model_depth(depth.to(self.device))
+        pose = img2tensor(im_keypose_out, bgr2rgb=True, float32=True) / 255.
+        pose = pose.unsqueeze(0)
+        features_adapter_keypose = self.model_pose(pose.to(self.device))
+        features_adapter = [f_d*w_depth + f_k*w_keypose for f_d, f_k in zip(features_adapter_depth, features_adapter_keypose)]
+        shape = [4, 64, 64]
+        # sampling
+        con_strength = int((1 - con_strength) * 50)
+        samples_ddim, _ = self.sampler.sample(S=50,
+                                              conditioning=c,
+                                              batch_size=1,
+                                              shape=shape,
+                                              verbose=False,
+                                              unconditional_guidance_scale=scale,
+                                              unconditional_conditioning=nc,
+                                              eta=0.0,
+                                              x_T=None,
+                                              features_adapter1=features_adapter,
+                                              mode='sketch',
+                                              con_strength=con_strength)
+        x_samples_ddim = self.base_model.decode_first_stage(samples_ddim)
+        x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
+        x_samples_ddim = x_samples_ddim.to('cpu')
+        x_samples_ddim = x_samples_ddim.permute(0, 2, 3, 1).numpy()[0]
+        x_samples_ddim = 255. * x_samples_ddim
+        x_samples_ddim = x_samples_ddim.astype(np.uint8)
+        return [im_depth_out, im_keypose_out, x_samples_ddim]
     @torch.no_grad()
     def process_seg(self, input_img, type_in, prompt, neg_prompt, pos_prompt, fix_sample, scale,
                        con_strength, base_model):