Spaces:

rynmurdock
/

generative_recsys

Runtime error

App Files Files Community

rynmurdock commited on May 21, 2024

Commit

86d2837

1 Parent(s): 9c7e8e1

limit to 10 rows from 1 user for diversity.

Browse files

Files changed (1) hide show

app.py +39 -23

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # TODO save & restart from (if it exists) dataframe parquet
 import torch
@@ -37,12 +37,9 @@ torch.set_grad_enabled(False)
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
-prevs_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'latest_user_to_rate'])
 import spaces
-prompt_list = [p for p in list(set(
-                pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
 start_time = time.time()
 ####################### Setup Model
@@ -55,13 +52,13 @@ from transformers import CLIPVisionModelWithProjection
 import uuid
 import av
-def write_video(file_name, images, fps=17):
     print('Saving')
     container = av.open(file_name, mode="w")
     stream = container.add_stream("h264", rate=fps)
     # stream.options = {'preset': 'faster'}
-    stream.thread_count = 0
     stream.width = 512
     stream.height = 512
     stream.pix_fmt = "yuv420p"
@@ -79,8 +76,16 @@ def write_video(file_name, images, fps=17):
     container.close()
     print('Saved')
-image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="sdxl_models/image_encoder", torch_dtype=dtype).to(DEVICE)
 #vae = AutoencoderTiny.from_pretrained("madebyollin/taesd", torch_dtype=dtype)
 # vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=dtype)
@@ -91,8 +96,9 @@ image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter",
 #text_encoder = CLIPTextModel.from_pretrained(finetune_path+'/text_encoder/').to(dtype)
-unet = UNet2DConditionModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='unet').to(dtype)
-text_encoder = CLIPTextModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='text_encoder').to(dtype)
 adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
 pipe = AnimateDiffPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", motion_adapter=adapter, image_encoder=image_encoder, torch_dtype=dtype, unet=unet, text_encoder=text_encoder)
@@ -101,6 +107,7 @@ pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_
 pipe.set_adapters(["lcm-lora"], [.9])
 pipe.fuse_lora()
 #pipe = AnimateDiffPipeline.from_pretrained('emilianJR/epiCRealism', torch_dtype=dtype, image_encoder=image_encoder)
 #pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
 #repo = "ByteDance/AnimateDiff-Lightning"
@@ -116,8 +123,7 @@ pipe.unet.fuse_qkv_projections()
 pipe.to(device=DEVICE)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
 #im_embs = torch.zeros(1, 1, 1, 1280, device=DEVICE, dtype=dtype)
 #output = pipe(prompt='a person', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[im_embs], num_inference_steps=STEPS)
 #leave_im_emb, _ = pipe.encode_image(
@@ -126,13 +132,13 @@ pipe.to(device=DEVICE)
 #assert len(output.frames[0]) == 16
 #leave_im_emb.detach().to('cpu')
-@spaces.GPU(duration=20)
 def generate_gpu(in_im_embs):
     print('start gen')
     in_im_embs = in_im_embs.to('cuda').unsqueeze(0).unsqueeze(0)
     #im_embs = torch.cat((torch.zeros(1, 1280, device=DEVICE, dtype=dtype), in_im_embs), 0)
-    output = pipe(prompt='a scene', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
     print('image is made')
     im_emb, _ = pipe.encode_image(
                 output.frames[0][len(output.frames[0])//2], 'cuda', 1, output_hidden_state
@@ -163,10 +169,6 @@ def generate(in_im_embs):
 #######################
-# TODO only generate ~5 new images ahead from a specific user embedding. Do this by tracking a column of who's embedding it was and
-#    taking the intersection for unrated by that user and from that users' embedding. Then we keep styles less consistent for better variety.
 def get_user_emb(embs, ys):
     # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
     if len(list(set(ys))) <= 1:
@@ -245,7 +247,17 @@ def background_next_image():
     for uid in user_id_list:
         rated_rows = prevs_df[[i[1]['user:rating'].get(uid, None) is not None for i in prevs_df.iterrows()]]
         not_rated_rows = prevs_df[[i[1]['user:rating'].get(uid, None) is None for i in prevs_df.iterrows()]]
-        if len(rated_rows) < 4:# or len(not_rated_rows) > 7:
             print(f'latest user {uid} has < 4 rows') # or > 7 unrated rows')
             continue
@@ -260,6 +272,7 @@ def background_next_image():
             tmp_df['paths'] = [img]
             tmp_df['embeddings'] = [embs]
             tmp_df['user:rating'] = [{' ': ' '}]
             prevs_df = pd.concat((prevs_df, tmp_df))
             # we can free up storage by deleting the image
             if len(prevs_df) > 50:
@@ -345,7 +358,9 @@ def choose(img, choice, calibrate_prompts, user_id, request: gr.Request):
         choice = 0
     row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
-    if len(prevs_df.loc[row_mask, 'user:rating'][0]) > 0:
         prevs_df.loc[row_mask, 'user:rating'][0][user_id] = choice
         prevs_df.loc[row_mask, 'latest_user_to_rate'] = [user_id]
     img, calibrate_prompts = next_image(calibrate_prompts, user_id)
@@ -411,6 +426,7 @@ Explore the latent space without text prompts based on your preferences. Learn m
     ''', elem_id="description")
     user_id = gr.State()
     print('USER_ID: ',user_id)
     calibrate_prompts = gr.State([
     './first.mp4',
     './second.mp4',
@@ -429,7 +445,7 @@ Explore the latent space without text prompts based on your preferences. Learn m
         interactive=False,
         height=512,
         width=512,
-        include_audio=False,
         elem_id="video_output"
        )
         img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
@@ -471,12 +487,12 @@ log = logging.getLogger('log_here')
 log.setLevel(logging.ERROR)
 scheduler = BackgroundScheduler()
-scheduler.add_job(func=background_next_image, trigger="interval", seconds=4)
 scheduler.start()
 def encode_space(x):
     im_emb, _ = pipe.encode_image(
-                image, 'cpu', 1, output_hidden_state
             )
     return im_emb.detach().to('cpu').to(torch.float32)

+# TODO unify/merge origin and this
 # TODO save & restart from (if it exists) dataframe parquet
 import torch
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
+prevs_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'latest_user_to_rate', 'from_user_id'])
 import spaces
 start_time = time.time()
 ####################### Setup Model
 import uuid
 import av
+def write_video_av(file_name, images, fps=17):
     print('Saving')
     container = av.open(file_name, mode="w")
     stream = container.add_stream("h264", rate=fps)
     # stream.options = {'preset': 'faster'}
+    stream.thread_count = -1
     stream.width = 512
     stream.height = 512
     stream.pix_fmt = "yuv420p"
     container.close()
     print('Saved')
+def write_video(file_name, images, fps=15):
+    writer = imageio.get_writer(file_name, fps=fps)
+    for im in images:
+        writer.append_data(np.array(im))
+    writer.close()
+image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="sdxl_models/image_encoder", torch_dtype=dtype,
+device_map='cpu')
 #vae = AutoencoderTiny.from_pretrained("madebyollin/taesd", torch_dtype=dtype)
 # vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=dtype)
 #text_encoder = CLIPTextModel.from_pretrained(finetune_path+'/text_encoder/').to(dtype)
+unet = UNet2DConditionModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='unet',).to(dtype).to('cpu')
+text_encoder = CLIPTextModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='text_encoder',
+device_map='cpu').to(dtype)
 adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
 pipe = AnimateDiffPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", motion_adapter=adapter, image_encoder=image_encoder, torch_dtype=dtype, unet=unet, text_encoder=text_encoder)
 pipe.set_adapters(["lcm-lora"], [.9])
 pipe.fuse_lora()
 #pipe = AnimateDiffPipeline.from_pretrained('emilianJR/epiCRealism', torch_dtype=dtype, image_encoder=image_encoder)
 #pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
 #repo = "ByteDance/AnimateDiff-Lightning"
 pipe.to(device=DEVICE)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
+# TODO cannot compile on Spaces or we time out; don't run leave_imb stuff either
 #im_embs = torch.zeros(1, 1, 1, 1280, device=DEVICE, dtype=dtype)
 #output = pipe(prompt='a person', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[im_embs], num_inference_steps=STEPS)
 #leave_im_emb, _ = pipe.encode_image(
 #assert len(output.frames[0]) == 16
 #leave_im_emb.detach().to('cpu')
+@spaces.GPU(duration=10)
 def generate_gpu(in_im_embs):
     print('start gen')
     in_im_embs = in_im_embs.to('cuda').unsqueeze(0).unsqueeze(0)
     #im_embs = torch.cat((torch.zeros(1, 1280, device=DEVICE, dtype=dtype), in_im_embs), 0)
+    output = pipe(prompt='', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
     print('image is made')
     im_emb, _ = pipe.encode_image(
                 output.frames[0][len(output.frames[0])//2], 'cuda', 1, output_hidden_state
 #######################
 def get_user_emb(embs, ys):
     # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
     if len(list(set(ys))) <= 1:
     for uid in user_id_list:
         rated_rows = prevs_df[[i[1]['user:rating'].get(uid, None) is not None for i in prevs_df.iterrows()]]
         not_rated_rows = prevs_df[[i[1]['user:rating'].get(uid, None) is None for i in prevs_df.iterrows()]]
+        # we need to intersect not_rated_rows from this user's embed > 7. Just add a new column on which user_id spawned the
+        #   media.
+        from_user = prevs_df[[i[1]['from_user_id'] == uid for i in prevs_df.iterrows()]]
+        if len(from_user) >= 10:
+            oldest = from_user.iloc[-1]['paths']
+            print(f'User has {len(from_user)} rows. Popping oldest: {oldest}')
+            prevs_df = prevs_df[prevs_df['paths'] != oldest]
+        if len(rated_rows) < 4:
             print(f'latest user {uid} has < 4 rows') # or > 7 unrated rows')
             continue
             tmp_df['paths'] = [img]
             tmp_df['embeddings'] = [embs]
             tmp_df['user:rating'] = [{' ': ' '}]
+            tmp_df['from_user_id'] = [uid]
             prevs_df = pd.concat((prevs_df, tmp_df))
             # we can free up storage by deleting the image
             if len(prevs_df) > 50:
         choice = 0
     row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
+    if len(prevs_df.loc[row_mask, 'user:rating']) > 0:
         prevs_df.loc[row_mask, 'user:rating'][0][user_id] = choice
         prevs_df.loc[row_mask, 'latest_user_to_rate'] = [user_id]
     img, calibrate_prompts = next_image(calibrate_prompts, user_id)
     ''', elem_id="description")
     user_id = gr.State()
     print('USER_ID: ',user_id)
+    # calibration videos -- this is a misnomer now :D
     calibrate_prompts = gr.State([
     './first.mp4',
     './second.mp4',
         interactive=False,
         height=512,
         width=512,
+        #include_audio=False,
         elem_id="video_output"
        )
         img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
 log.setLevel(logging.ERROR)
 scheduler = BackgroundScheduler()
+scheduler.add_job(func=background_next_image, trigger="interval", seconds=.1)
 scheduler.start()
 def encode_space(x):
     im_emb, _ = pipe.encode_image(
+                image, DEVICE, 1, output_hidden_state
             )
     return im_emb.detach().to('cpu').to(torch.float32)