RealVis_v5.0_BF16_IP_B

Running on Zero

App Files Files Community

1inkusFace commited on Jan 20

Commit

690a432

verified ·

1 Parent(s): 7d3c40d

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -64

app.py CHANGED Viewed

@@ -232,6 +232,101 @@ def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
         f.write(f"Model VAE: sdxl-vae to bfloat safetensor=false before cuda then attn_proc / scale factor 8 \n")
         f.write(f"Model UNET: ford442/RealVisXL_V5.0_BF16 \n")
     upload_to_ftp(filename)
 @spaces.GPU(duration=40)
 def generate_30(
@@ -266,39 +361,45 @@ def generate_30(
         sd_image_a = Image.open(latent_file.name).convert('RGB')
         sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
         caption.append(captioner(sd_image_a))
         caption.append(captioner_2(sd_image_a))
         caption.append(captioner_3(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             sd_image_b.resize((height,width), Image.LANCZOS)
-            caption.append(captioner(sd_image_a))
-            caption.append(captioner_2(sd_image_a))
-            caption.append(captioner_3(sd_image_a))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             sd_image_c.resize((height,width), Image.LANCZOS)
-            caption.append(captioner(sd_image_a))
-            caption.append(captioner_2(sd_image_a))
-            caption.append(captioner_3(sd_image_a))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             sd_image_d.resize((height,width), Image.LANCZOS)
-            caption.append(captioner(sd_image_a))
-            caption.append(captioner_2(sd_image_a))
-            caption.append(captioner_3(sd_image_a))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             sd_image_e.resize((height,width), Image.LANCZOS)
-            caption.append(captioner(sd_image_a))
-            caption.append(captioner_2(sd_image_a))
-            caption.append(captioner_3(sd_image_a))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -307,58 +408,9 @@ def generate_30(
         print(caption)
         print("-- generating further caption --")
-        system_prompt_rewrite = (
-            "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
-        )
-        user_prompt_rewrite = (
-            "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
-        )
-        user_prompt_rewrite_2 = (
-            "Rephrase this scene to have more elaborate details: "
-        )
-        input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
-        input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
-        print("-- got prompt --")
-        # Encode the input text and include the attention mask
-        encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
-        encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
-        # Ensure all values are on the correct device
-        input_ids = encoded_inputs["input_ids"].to(device)
-        input_ids_2 = encoded_inputs_2["input_ids"].to(device)
-        attention_mask = encoded_inputs["attention_mask"].to(device)
-        attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
-        print("-- tokenize prompt --")
-          # Google T5
-        #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
-        outputs = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            max_new_tokens=512,
-            temperature=0.2,
-            top_p=0.9,
-            do_sample=True,
-        )
-        outputs_2 = model.generate(
-            input_ids=input_ids_2,
-            attention_mask=attention_mask_2,
-            max_new_tokens=65,
-            temperature=0.2,
-            top_p=0.9,
-            do_sample=True,
-        )
-        # Use the encoded tensor 'text_inputs' here
-        enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
-        print('-- generated prompt --')
-        enhanced_prompt = filter_text(enhanced_prompt,prompt)
-        enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
-        print('-- filtered prompt --')
-        print(enhanced_prompt)
-        print('-- filtered prompt 2 --')
-        print(enhanced_prompt_2)
         print('-- generating image --')

         f.write(f"Model VAE: sdxl-vae to bfloat safetensor=false before cuda then attn_proc / scale factor 8 \n")
         f.write(f"Model UNET: ford442/RealVisXL_V5.0_BF16 \n")
     upload_to_ftp(filename)
+def captioning(img):
+    prompts_array = [
+        "Adjectives describing this scene are:",
+        "The color scheme of this image is",
+        "This scene could be described in detail as",
+        "The characters in this scene are",
+        "The larger details in this scene include",
+        "The smaller details in this scene include",
+        "The feeling this scene seems like",
+        "The setting of this scene must be located",
+    # Add more prompts here
+    ]
+    output_prompt=[]
+    # Initial caption generation without a prompt:
+    inputsa = processor5(images=img, return_tensors="pt").to('cuda')
+    generated_ids = model5.generate(**inputsa, min_length=42, max_length=42)
+    generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+    print(generated_text)
+    # Loop through prompts array:
+    for prompt in prompts_array:
+        inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
+        generated_ids = model5.generate(**inputs, min_length=32, max_length=42) # Adjust max_length if needed
+        generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+        response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
+        output_prompt.append(response_text)
+        print(f"{response_text}\n") # Print only the response text
+    # Continue conversation:
+    inputf = processor5(images=img, text=generated_text + 'So therefore', return_tensors="pt").to('cuda')
+    generated_ids = model5.generate(**inputf, max_length=42)
+    generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+    response_text = generated_text.replace(generated_text, "").strip()  # Remove the previous text plus 'So therefore'
+    print(response_text)
+    output_prompt.append(response_text)
+    print(output_prompt)
+    return output_prompt
+def expand_prompt(prompt):
+        system_prompt_rewrite = (
+            "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
+        )
+        user_prompt_rewrite = (
+            "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
+        )
+        user_prompt_rewrite_2 = (
+            "Rephrase this scene to have more elaborate details: "
+        )
+        input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
+        input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
+        print("-- got prompt --")
+        # Encode the input text and include the attention mask
+        encoded_inputs = txt_tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
+        encoded_inputs_2 = txt_tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
+        # Ensure all values are on the correct device
+        input_ids = encoded_inputs["input_ids"].to(device)
+        input_ids_2 = encoded_inputs_2["input_ids"].to(device)
+        attention_mask = encoded_inputs["attention_mask"].to(device)
+        attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
+        print("-- tokenize prompt --")
+          # Google T5
+        #input_ids = txt_tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
+        outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=512,
+            temperature=0.2,
+            top_p=0.9,
+            do_sample=True,
+        )
+        outputs_2 = model.generate(
+            input_ids=input_ids_2,
+            attention_mask=attention_mask_2,
+            max_new_tokens=65,
+            temperature=0.2,
+            top_p=0.9,
+            do_sample=True,
+        )
+        # Use the encoded tensor 'text_inputs' here
+        enhanced_prompt = txt_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        enhanced_prompt_2 = txt_tokenizer.decode(outputs_2[0], skip_special_tokens=True)
+        print('-- generated prompt --')
+        enhanced_prompt = filter_text(enhanced_prompt,prompt)
+        enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
+        print('-- filtered prompt --')
+        print(enhanced_prompt)
+        print('-- filtered prompt 2 --')
+        print(enhanced_prompt_2)
+        enh_prompt=[enhanced_prompt,enhanced_prompt_2]
+    return enh_prompt
 @spaces.GPU(duration=40)
 def generate_30(
         sd_image_a = Image.open(latent_file.name).convert('RGB')
         sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
+        caption_2=[]
         caption.append(captioner(sd_image_a))
         caption.append(captioner_2(sd_image_a))
         caption.append(captioner_3(sd_image_a))
+        caption_2.append(captioning(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             sd_image_b.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_b))
+            caption.append(captioner_2(sd_image_b))
+            caption.append(captioner_3(sd_image_b))
+            caption_2.append(captioning(sd_image_b))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             sd_image_c.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_c))
+            caption.append(captioner_2(sd_image_c))
+            caption.append(captioner_3(sd_image_c))
+            caption_2.append(captioning(sd_image_c))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             sd_image_d.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_d))
+            caption.append(captioner_2(sd_image_d))
+            caption.append(captioner_3(sd_image_d))
+            caption_2.append(captioning(sd_image_d))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             sd_image_e.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_e))
+            caption.append(captioner_2(sd_image_e))
+            caption.append(captioner_3(sd_image_e))
+            caption_2.append(captioning(sd_image_e))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         print(caption)
         print("-- generating further caption --")
+        expand_prompt(prompt)
+        expand_prompt(caption)
+        expand_prompt(caption_2)
         print('-- generating image --')