RealVis_v5.0_BF16_IP_B

Running on Zero

App Files Files Community

1inkusFace commited on Jan 21

Commit

37e2aa5

verified ·

1 Parent(s): 5116401

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -30

app.py CHANGED Viewed

@@ -110,7 +110,7 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str
     if not negative:
         negative = ""
     return p.replace("{prompt}", positive), n + negative
 #unetX = UNet2DConditionModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='unet', low_cpu_mem_usage=False, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
 def load_and_prepare_model():
@@ -128,7 +128,7 @@ def load_and_prepare_model():
         vae=None,
        # unet=None,
     )
     '''
     scaling_factor (`float`, *optional*, defaults to 0.18215):
             The component-wise standard deviation of the trained latent space computed using the first batch of the
@@ -141,7 +141,7 @@ def load_and_prepare_model():
             If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
             can be fine-tuned / trained to a lower range without loosing too much precision in which case
             `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
     '''
     #pipe.vae=vaeX
     pipe.vae=vaeX.to(device)
@@ -163,9 +163,9 @@ def load_and_prepare_model():
     print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
     #print(f'UNET: {pipe.unet}')
     pipe.watermark=None
-    pipe.safety_checker=None
     return pipe
 # Preload and compile both models
 pipe = load_and_prepare_model()
@@ -181,13 +181,13 @@ checkpoint = "ford442/Phi-3.5-mini-instruct-bf16"
 #txt_tokenizer.tokenizer_legacy=False
 #model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
 #model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
-model5 = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b").to('cuda')
 processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
 text_encoder_1=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True) #.to(device=device, dtype=torch.bfloat16)
 text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True) #.to(device=device, dtype=torch.bfloat16)
 MAX_SEED = np.iinfo(np.int32).max
 neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
@@ -212,7 +212,7 @@ def filter_text(text,phraseC):
   else:
         # Handle the case where no match is found
         return text
 def upload_to_ftp(filename):
     try:
         transport = paramiko.Transport((FTP_HOST, 22))
@@ -225,7 +225,7 @@ def upload_to_ftp(filename):
         print(f"Uploaded {filename} to FTP server")
     except Exception as e:
         print(f"FTP upload error: {e}")
 def save_image(img):
     unique_name = str(uuid.uuid4()) + ".png"
     img.save(unique_name,optimize=False,compress_level=0)
@@ -241,7 +241,7 @@ def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
         f.write(f"Guidance Scale: {guidance_scale} \n")
         f.write(f"SPACE SETUP: \n")
         f.write(f"Model UNET: ford442/RealVisXL_V5.0_BF16 \n")
-    upload_to_ftp(filename)
 def captioning(img):
     prompts_array = [
@@ -252,7 +252,7 @@ def captioning(img):
         "The larger details in this scene include",
         "The smaller details in this scene include",
      #   "The feeling this scene seems like",
-        "The setting of this scene must be located",
     # Add more prompts here
     ]
     output_prompt=[]
@@ -271,11 +271,11 @@ def captioning(img):
         length_penalty=1.0,
         temperature=1,
     )
     generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
     generated_text = generated_text.replace(cap_prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
     output_prompt.append(generated_text)
-    print(generated_text)
     # Loop through prompts array:
     for prompt in prompts_array:
         inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
@@ -317,7 +317,7 @@ def captioning(img):
     output_prompt.append(response_text)
     output_prompt = " ".join(output_prompt)
     return output_prompt
 def flatten_and_stringify(data):
     return [str(item) for sublist in data if isinstance(sublist, list) for item in flatten_and_stringify(sublist) ] + [str(item) for item in data if not isinstance(item, list)]
@@ -381,8 +381,8 @@ def expand_prompt(prompt):
         print(enhanced_prompt_2)
         enh_prompt=[enhanced_prompt,enhanced_prompt_2]
         '''
-        return enhanced_prompt
 @spaces.GPU(duration=40)
 def generate_30(
     prompt: str = "",
@@ -506,8 +506,8 @@ def generate_30(
         downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
         downscale_path = f"rvIP_upscale_{timestamp}.png"
         downscale1.save(downscale_path,optimize=False,compress_level=0)
-        upload_to_ftp(downscale_path)
-        image_paths = [save_image(downscale1)]
     else:
         print('-- IMAGE REQUIRED --')
     return image_paths
@@ -635,8 +635,8 @@ def generate_60(
         downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
         downscale_path = f"rvIP_upscale_{timestamp}.png"
         downscale1.save(downscale_path,optimize=False,compress_level=0)
-        upload_to_ftp(downscale_path)
-        image_paths = [save_image(downscale1)]
     else:
         print('-- IMAGE REQUIRED --')
     return image_paths
@@ -764,8 +764,8 @@ def generate_90(
         downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
         downscale_path = f"rvIP_upscale_{timestamp}.png"
         downscale1.save(downscale_path,optimize=False,compress_level=0)
-        upload_to_ftp(downscale_path)
-        image_paths = [save_image(downscale1)]
     else:
         print('-- IMAGE REQUIRED --')
     return image_paths
@@ -818,7 +818,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
         run_button_30 = gr.Button("Run 30 Seconds", scale=0)
         run_button_60 = gr.Button("Run 60 Seconds", scale=0)
         run_button_90 = gr.Button("Run 90 Seconds", scale=0)
-    result = gr.Gallery(label="Result", columns=1, show_label=False)
     ip_strength =  gr.Slider(
             label="Image Strength",
             minimum=0.0,
@@ -938,7 +938,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
         outputs=negative_prompt,
         api_name=False,
     )
     gr.on(
         triggers=[
             run_button_30.click,
@@ -970,7 +970,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
         ],
         outputs=[result],
     )
     gr.on(
         triggers=[
             run_button_60.click,
@@ -1002,7 +1002,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
         ],
         outputs=[result],
     )
     gr.on(
         triggers=[
             run_button_90.click,
@@ -1051,7 +1051,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
     gr.Markdown(
     """
     <div style="text-align: justify;">
-    ⚡This is the demo space for generating images using Stable Diffusion XL with quality styles, different models, and types. Try the sample prompts to generate higher quality images. Try the sample prompts for generating higher quality images.
     <a href='https://huggingface.co/spaces/prithivMLmods/Top-Prompt-Collection' target='_blank'>Try prompts</a>.
     </div>
     """)
@@ -1061,12 +1061,12 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
     <div style="text-align: justify;">
     ⚠️ Users are accountable for the content they generate and are responsible for ensuring it meets appropriate ethical standards.
     </div>
-    """)
 def text_generation(input_text, seed):
     full_prompt = "Text Generator Application by ecarbo"
     return full_prompt
 title = "Text Generator Demo GPT-Neo"
 description = "Text Generator Application by ecarbo"
@@ -1083,4 +1083,4 @@ if __name__ == "__main__":
         description=description,
     )
     combined_interface = gr.TabbedInterface([demo_interface, text_gen_interface], ["Image Generation", "Text Generation"])
-    combined_interface.launch(show_api=False)

     if not negative:
         negative = ""
     return p.replace("{prompt}", positive), n + negative
 #unetX = UNet2DConditionModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='unet', low_cpu_mem_usage=False, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
 def load_and_prepare_model():
         vae=None,
        # unet=None,
     )
     '''
     scaling_factor (`float`, *optional*, defaults to 0.18215):
             The component-wise standard deviation of the trained latent space computed using the first batch of the
             If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
             can be fine-tuned / trained to a lower range without loosing too much precision in which case
             `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
     '''
     #pipe.vae=vaeX
     pipe.vae=vaeX.to(device)
     print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
     #print(f'UNET: {pipe.unet}')
     pipe.watermark=None
+    pipe.safety_checker=None
     return pipe
 # Preload and compile both models
 pipe = load_and_prepare_model()
 #txt_tokenizer.tokenizer_legacy=False
 #model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
 #model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
+model5 = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b").to('cuda',torch.bfloat16)
 processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
 text_encoder_1=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True) #.to(device=device, dtype=torch.bfloat16)
 text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True) #.to(device=device, dtype=torch.bfloat16)
 MAX_SEED = np.iinfo(np.int32).max
 neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
   else:
         # Handle the case where no match is found
         return text
 def upload_to_ftp(filename):
     try:
         transport = paramiko.Transport((FTP_HOST, 22))
         print(f"Uploaded {filename} to FTP server")
     except Exception as e:
         print(f"FTP upload error: {e}")
 def save_image(img):
     unique_name = str(uuid.uuid4()) + ".png"
     img.save(unique_name,optimize=False,compress_level=0)
         f.write(f"Guidance Scale: {guidance_scale} \n")
         f.write(f"SPACE SETUP: \n")
         f.write(f"Model UNET: ford442/RealVisXL_V5.0_BF16 \n")
+    upload_to_ftp(filename)
 def captioning(img):
     prompts_array = [
         "The larger details in this scene include",
         "The smaller details in this scene include",
      #   "The feeling this scene seems like",
+        "The setting of this scene must be located",
     # Add more prompts here
     ]
     output_prompt=[]
         length_penalty=1.0,
         temperature=1,
     )
     generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
     generated_text = generated_text.replace(cap_prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
     output_prompt.append(generated_text)
+    print(generated_text)
     # Loop through prompts array:
     for prompt in prompts_array:
         inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
     output_prompt.append(response_text)
     output_prompt = " ".join(output_prompt)
     return output_prompt
 def flatten_and_stringify(data):
     return [str(item) for sublist in data if isinstance(sublist, list) for item in flatten_and_stringify(sublist) ] + [str(item) for item in data if not isinstance(item, list)]
         print(enhanced_prompt_2)
         enh_prompt=[enhanced_prompt,enhanced_prompt_2]
         '''
+        return enhanced_prompt
 @spaces.GPU(duration=40)
 def generate_30(
     prompt: str = "",
         downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
         downscale_path = f"rvIP_upscale_{timestamp}.png"
         downscale1.save(downscale_path,optimize=False,compress_level=0)
+        upload_to_ftp(downscale_path)
+        image_paths = [save_image(downscale1)]
     else:
         print('-- IMAGE REQUIRED --')
     return image_paths
         downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
         downscale_path = f"rvIP_upscale_{timestamp}.png"
         downscale1.save(downscale_path,optimize=False,compress_level=0)
+        upload_to_ftp(downscale_path)
+        image_paths = [save_image(downscale1)]
     else:
         print('-- IMAGE REQUIRED --')
     return image_paths
         downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
         downscale_path = f"rvIP_upscale_{timestamp}.png"
         downscale1.save(downscale_path,optimize=False,compress_level=0)
+        upload_to_ftp(downscale_path)
+        image_paths = [save_image(downscale1)]
     else:
         print('-- IMAGE REQUIRED --')
     return image_paths
         run_button_30 = gr.Button("Run 30 Seconds", scale=0)
         run_button_60 = gr.Button("Run 60 Seconds", scale=0)
         run_button_90 = gr.Button("Run 90 Seconds", scale=0)
+    result = gr.Gallery(label="Result", columns=1, show_label=False)
     ip_strength =  gr.Slider(
             label="Image Strength",
             minimum=0.0,
         outputs=negative_prompt,
         api_name=False,
     )
     gr.on(
         triggers=[
             run_button_30.click,
         ],
         outputs=[result],
     )
     gr.on(
         triggers=[
             run_button_60.click,
         ],
         outputs=[result],
     )
     gr.on(
         triggers=[
             run_button_90.click,
     gr.Markdown(
     """
     <div style="text-align: justify;">
+    ⚡This is the demo space for generating images using Stable Diffusion XL with quality styles, different models, and types. Try the sample prompts to generate higher quality images. Try the sample prompts for generating higher quality images.
     <a href='https://huggingface.co/spaces/prithivMLmods/Top-Prompt-Collection' target='_blank'>Try prompts</a>.
     </div>
     """)
     <div style="text-align: justify;">
     ⚠️ Users are accountable for the content they generate and are responsible for ensuring it meets appropriate ethical standards.
     </div>
+    """)
 def text_generation(input_text, seed):
     full_prompt = "Text Generator Application by ecarbo"
     return full_prompt
 title = "Text Generator Demo GPT-Neo"
 description = "Text Generator Application by ecarbo"
         description=description,
     )
     combined_interface = gr.TabbedInterface([demo_interface, text_gen_interface], ["Image Generation", "Text Generation"])
+    combined_interface.launch(show_api=False)