RealVis_v5.0_BF16_IP_B

Running on Zero

App Files Files Community

1inkusFace commited on Jan 20

Commit

ea0b581

verified ·

1 Parent(s): 4d5c246

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -158,15 +158,16 @@ def load_and_prepare_model():
 pipe = load_and_prepare_model()
 # text models
-checkpoint = "microsoft/Phi-3.5-mini-instruct"
 captioner = pipeline(model="ydshieh/vit-gpt2-coco-en",device='cuda', task="image-to-text")
 captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
 captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
 model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
 processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16")
-#txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False)
-#txt_tokenizer.tokenizer_legacy=False
-#model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
 text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
@@ -242,14 +243,14 @@ def captioning(img):
     output_prompt=[]
     # Initial caption generation without a prompt:
     inputsa = processor5(images=img, return_tensors="pt").to('cuda')
-    generated_ids = model5.generate(**inputsa, min_length=24, max_length=42)
     generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
     output_prompt.append(generated_text)
     print(generated_text)
     # Loop through prompts array:
     for prompt in prompts_array:
         inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
-        generated_ids = model5.generate(**inputs, max_length=42) # Adjust max_length if needed
         generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
         output_prompt.append(response_text)
@@ -263,7 +264,7 @@ def captioning(img):
     #output_prompt.append(response_text)
     print(output_prompt)
     return output_prompt
-'''
 def expand_prompt(prompt):
         system_prompt_rewrite = (
             "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
@@ -316,7 +317,7 @@ def expand_prompt(prompt):
         print(enhanced_prompt_2)
         enh_prompt=[enhanced_prompt,enhanced_prompt_2]
         return enh_prompt
-'''
 @spaces.GPU(duration=40)
 def generate_30(
     prompt: str = "",
@@ -398,10 +399,9 @@ def generate_30(
         print(caption_2)
         print("-- generating further caption --")
-        #expand_prompt(prompt)
-        #expand_prompt(caption)
-        #expand_prompt(caption_2)
         print('-- generating image --')
         sd_image = ip_model.generate(

 pipe = load_and_prepare_model()
 # text models
+#checkpoint = "microsoft/Phi-3.5-mini-instruct"
+checkpoint = "ford442/Phi-3.5-mini-instruct-bf16"
 captioner = pipeline(model="ydshieh/vit-gpt2-coco-en",device='cuda', task="image-to-text")
 captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
 captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
 model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
 processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16")
+txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False)
+txt_tokenizer.tokenizer_legacy=False
+model = AutoModelForCausalLM.from_pretrained(checkpoint,attn_implementation="flash_attention_2").to('cuda')
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
 text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
     output_prompt=[]
     # Initial caption generation without a prompt:
     inputsa = processor5(images=img, return_tensors="pt").to('cuda')
+    generated_ids = model5.generate(**inputsa, min_length=32, max_length=64)
     generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
     output_prompt.append(generated_text)
     print(generated_text)
     # Loop through prompts array:
     for prompt in prompts_array:
         inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
+        generated_ids = model5.generate(**inputs) # Adjust max_length if needed
         generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
         output_prompt.append(response_text)
     #output_prompt.append(response_text)
     print(output_prompt)
     return output_prompt
 def expand_prompt(prompt):
         system_prompt_rewrite = (
             "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
         print(enhanced_prompt_2)
         enh_prompt=[enhanced_prompt,enhanced_prompt_2]
         return enh_prompt
 @spaces.GPU(duration=40)
 def generate_30(
     prompt: str = "",
         print(caption_2)
         print("-- generating further caption --")
+        expand_prompt(prompt)
+        expand_prompt(caption)
+        expand_prompt(caption_2)
         print('-- generating image --')
         sd_image = ip_model.generate(