Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -159,14 +159,14 @@ pipe = load_and_prepare_model()
|
|
159 |
|
160 |
# text models
|
161 |
checkpoint = "microsoft/Phi-3.5-mini-instruct"
|
162 |
-
captioner = pipeline(model="ydshieh/vit-gpt2-coco-en",device='cuda', task="image-to-text")
|
163 |
-
captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
|
164 |
captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
|
165 |
model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
|
166 |
processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16")
|
167 |
-
txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False)
|
168 |
-
txt_tokenizer.tokenizer_legacy=False
|
169 |
-
model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
|
170 |
|
171 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
172 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
@@ -262,7 +262,7 @@ def captioning(img):
|
|
262 |
output_prompt.append(response_text)
|
263 |
print(output_prompt)
|
264 |
return output_prompt
|
265 |
-
|
266 |
def expand_prompt(prompt):
|
267 |
system_prompt_rewrite = (
|
268 |
"You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
|
@@ -315,7 +315,7 @@ def expand_prompt(prompt):
|
|
315 |
print(enhanced_prompt_2)
|
316 |
enh_prompt=[enhanced_prompt,enhanced_prompt_2]
|
317 |
return enh_prompt
|
318 |
-
|
319 |
@spaces.GPU(duration=40)
|
320 |
def generate_30(
|
321 |
prompt: str = "",
|
@@ -396,9 +396,9 @@ def generate_30(
|
|
396 |
print(caption)
|
397 |
print("-- generating further caption --")
|
398 |
|
399 |
-
expand_prompt(prompt)
|
400 |
-
expand_prompt(caption)
|
401 |
-
expand_prompt(caption_2)
|
402 |
|
403 |
|
404 |
print('-- generating image --')
|
|
|
159 |
|
160 |
# text models
|
161 |
checkpoint = "microsoft/Phi-3.5-mini-instruct"
|
162 |
+
#captioner = pipeline(model="ydshieh/vit-gpt2-coco-en",device='cuda', task="image-to-text")
|
163 |
+
#captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
|
164 |
captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
|
165 |
model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
|
166 |
processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16")
|
167 |
+
#txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False)
|
168 |
+
#txt_tokenizer.tokenizer_legacy=False
|
169 |
+
#model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
|
170 |
|
171 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
172 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
|
|
262 |
output_prompt.append(response_text)
|
263 |
print(output_prompt)
|
264 |
return output_prompt
|
265 |
+
'''
|
266 |
def expand_prompt(prompt):
|
267 |
system_prompt_rewrite = (
|
268 |
"You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
|
|
|
315 |
print(enhanced_prompt_2)
|
316 |
enh_prompt=[enhanced_prompt,enhanced_prompt_2]
|
317 |
return enh_prompt
|
318 |
+
'''
|
319 |
@spaces.GPU(duration=40)
|
320 |
def generate_30(
|
321 |
prompt: str = "",
|
|
|
396 |
print(caption)
|
397 |
print("-- generating further caption --")
|
398 |
|
399 |
+
#expand_prompt(prompt)
|
400 |
+
#expand_prompt(caption)
|
401 |
+
#expand_prompt(caption_2)
|
402 |
|
403 |
|
404 |
print('-- generating image --')
|