StableDiffusion-3.5-Large

Running on Zero

App Files Files Community

ford442 commited on Dec 16, 2024

Commit

ffdb810

verified ·

1 Parent(s): 0956914

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -57

app.py CHANGED Viewed

@@ -122,55 +122,59 @@ def infer(
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
-    system_prompt_rewrite = (
-    "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
-    )
-    user_prompt_rewrite = (
-    "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
-    )
-    user_prompt_rewrite_2 = (
-    "Rephrase this scene to have more elaborate details: "
-    )
-    input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
-    input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
-    print("-- got prompt --")
-    # Encode the input text and include the attention mask
-    encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
-    encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
-    # Ensure all values are on the correct device
-    input_ids = encoded_inputs["input_ids"].to(device)
-    input_ids_2 = encoded_inputs_2["input_ids"].to(device)
-    attention_mask = encoded_inputs["attention_mask"].to(device)
-    attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
-    print("-- tokenize prompt --")
-      # Google T5
-    #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
-    outputs = model.generate(
-        input_ids=input_ids,
-        attention_mask=attention_mask,
-        max_new_tokens=512,
-        temperature=0.2,
-        top_p=0.9,
-        do_sample=True,
-    )
-    outputs_2 = model.generate(
-        input_ids=input_ids_2,
-        attention_mask=attention_mask_2,
-        max_new_tokens=65,
-        temperature=0.2,
-        top_p=0.9,
-        do_sample=True,
-    )
-    # Use the encoded tensor 'text_inputs' here
-    enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
-    print('-- generated prompt --')
-    enhanced_prompt = filter_text(enhanced_prompt,prompt)
-    enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
-    print('-- filtered prompt --')
-    print(enhanced_prompt)
-    print('-- filtered prompt 2 --')
-    print(enhanced_prompt_2)
     if latent_file:  # Check if a latent file is provided
       #  initial_latents = pipe.prepare_latents(
       #      batch_size=1,
@@ -216,13 +220,19 @@ def infer(
             max_sequence_length=512
         ).images[0]
         print('-- got image --')
-    #sd35_image = pipe.vae.decode(sd_image / 0.18215).sample
-    #sd35_image = sdxl_image.cpu().permute(0, 2, 3, 1).float().detach().numpy()
-    #sd35_image = (sdxl_image * 255).round().astype("uint8")
-    #image_pil = Image.fromarray(sd35_image[0])
-    sd35_path = f"sd35_{seed}.png"
-    sd_image.save(sd35_path,optimize=False,compress_level=0)
-    upload_to_ftp(sd35_path)
         # Convert the generated image to a tensor
     #generated_image_tensor = torch.tensor([np.array(sd_image).transpose(2, 0, 1)]).to('cuda') / 255.0
@@ -293,7 +303,6 @@ def repeat_infer(
         i += 1
     return result, seed, image_path, enhanced_prompt
 with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Medium (with refine)")

 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
+    if expanded:
+        system_prompt_rewrite = (
+        "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
+        )
+        user_prompt_rewrite = (
+        "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
+        )
+        user_prompt_rewrite_2 = (
+        "Rephrase this scene to have more elaborate details: "
+        )
+        input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
+        input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
+        print("-- got prompt --")
+        # Encode the input text and include the attention mask
+        encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
+        encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
+        # Ensure all values are on the correct device
+        input_ids = encoded_inputs["input_ids"].to(device)
+        input_ids_2 = encoded_inputs_2["input_ids"].to(device)
+        attention_mask = encoded_inputs["attention_mask"].to(device)
+        attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
+        print("-- tokenize prompt --")
+          # Google T5
+        #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
+        outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=512,
+            temperature=0.2,
+            top_p=0.9,
+            do_sample=True,
+        )
+        outputs_2 = model.generate(
+            input_ids=input_ids_2,
+            attention_mask=attention_mask_2,
+            max_new_tokens=65,
+            temperature=0.2,
+            top_p=0.9,
+            do_sample=True,
+        )
+        # Use the encoded tensor 'text_inputs' here
+        enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
+        print('-- generated prompt --')
+        enhanced_prompt = filter_text(enhanced_prompt,prompt)
+        enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
+        print('-- filtered prompt --')
+        print(enhanced_prompt)
+        print('-- filtered prompt 2 --')
+        print(enhanced_prompt_2)
+    else:
+        enhanced_prompt = prompt
+        enhanced_prompt_2 = prompt
     if latent_file:  # Check if a latent file is provided
       #  initial_latents = pipe.prepare_latents(
       #      batch_size=1,
             max_sequence_length=512
         ).images[0]
         print('-- got image --')
+        sd35_image_image = pipe.vae.decode(sd_image / 0.18215).sample
+        sd35_image = sd35_image.cpu().permute(0, 2, 3, 1).float().detach().numpy()
+        sd35_image = (sd35_image * 255).round().astype("uint8")
+        image_pil = Image.fromarray(sd35_image[0])
+        sd35_path = f"tst_rv_{seed}.png"
+        image_pil.save(sd35_path,optimize=False,compress_level=0)
+        upload_to_ftp(sd35_path)
+    #sd35_path = f"sd35_{seed}.png"
+    #sd_image.save(sd35_path,optimize=False,compress_level=0)
+    #upload_to_ftp(sd35_path)
         # Convert the generated image to a tensor
     #generated_image_tensor = torch.tensor([np.array(sd_image).transpose(2, 0, 1)]).to('cuda') / 255.0
         i += 1
     return result, seed, image_path, enhanced_prompt
 with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Medium (with refine)")