Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -4,6 +4,7 @@ | |
| 4 | 
             
            # in the Software without restriction, including without limitation the rights
         | 
| 5 | 
             
            # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         | 
| 6 | 
             
            # copies of the Software, and to permit persons to whom the Software is
         | 
|  | |
| 7 | 
             
            import spaces
         | 
| 8 | 
             
            import os
         | 
| 9 | 
             
            import random
         | 
| @@ -20,7 +21,7 @@ from ip_adapter import IPAdapterXL | |
| 20 | 
             
            from huggingface_hub import snapshot_download
         | 
| 21 | 
             
            import torch
         | 
| 22 | 
             
            from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
         | 
| 23 | 
            -
            from transformers import CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
         | 
| 24 |  | 
| 25 | 
             
            torch.backends.cuda.matmul.allow_tf32 = False
         | 
| 26 | 
             
            torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
         | 
| @@ -165,7 +166,9 @@ captioner_3 = pipeline(model="Salesforce/blip-image-captioning-large",device='cu | |
| 165 | 
             
             #model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
         | 
| 166 | 
             
            #processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
         | 
| 167 | 
             
             #processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
         | 
| 168 | 
            -
             | 
|  | |
|  | |
| 169 |  | 
| 170 | 
             
            ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
         | 
| 171 | 
             
            text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
         | 
| @@ -175,6 +178,27 @@ MAX_SEED = np.iinfo(np.int32).max | |
| 175 |  | 
| 176 | 
             
            neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
         | 
| 177 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 178 | 
             
            def upload_to_ftp(filename):
         | 
| 179 | 
             
                try:
         | 
| 180 | 
             
                    transport = paramiko.Transport((FTP_HOST, 22))
         | 
| @@ -277,6 +301,62 @@ def generate_30( | |
| 277 | 
             
                    filename= f'rv_IP_{timestamp}.png'
         | 
| 278 | 
             
                    print("-- using image file --")
         | 
| 279 | 
             
                    print(caption)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 280 | 
             
                    print('-- generating image --')
         | 
| 281 | 
             
                    sd_image = ip_model.generate(
         | 
| 282 | 
             
                            pil_image_1=sd_image_a,
         | 
|  | |
| 4 | 
             
            # in the Software without restriction, including without limitation the rights
         | 
| 5 | 
             
            # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         | 
| 6 | 
             
            # copies of the Software, and to permit persons to whom the Software is
         | 
| 7 | 
            +
             | 
| 8 | 
             
            import spaces
         | 
| 9 | 
             
            import os
         | 
| 10 | 
             
            import random
         | 
|  | |
| 21 | 
             
            from huggingface_hub import snapshot_download
         | 
| 22 | 
             
            import torch
         | 
| 23 | 
             
            from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
         | 
| 24 | 
            +
            from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
         | 
| 25 |  | 
| 26 | 
             
            torch.backends.cuda.matmul.allow_tf32 = False
         | 
| 27 | 
             
            torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
         | 
|  | |
| 166 | 
             
             #model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
         | 
| 167 | 
             
            #processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
         | 
| 168 | 
             
             #processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
         | 
| 169 | 
            +
            txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False)
         | 
| 170 | 
            +
            txt_tokenizer.tokenizer_legacy=False
         | 
| 171 | 
            +
            model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
         | 
| 172 |  | 
| 173 | 
             
            ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
         | 
| 174 | 
             
            text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
         | 
|  | |
| 178 |  | 
| 179 | 
             
            neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
         | 
| 180 |  | 
| 181 | 
            +
            def filter_text(text,phraseC):
         | 
| 182 | 
            +
              """Filters out the text up to and including 'Rewritten Prompt:'."""
         | 
| 183 | 
            +
              phrase = "Rewritten Prompt:"
         | 
| 184 | 
            +
              phraseB = "rewritten text:"
         | 
| 185 | 
            +
              pattern = f"(.*?){re.escape(phrase)}(.*)"
         | 
| 186 | 
            +
              patternB = f"(.*?){re.escape(phraseB)}(.*)"
         | 
| 187 | 
            +
              #  matchB = re.search(patternB, text)
         | 
| 188 | 
            +
              matchB = re.search(patternB, text, flags=re.DOTALL)
         | 
| 189 | 
            +
              if matchB:
         | 
| 190 | 
            +
                    filtered_text = matchB.group(2)
         | 
| 191 | 
            +
                    match = re.search(pattern, filtered_text, flags=re.DOTALL)
         | 
| 192 | 
            +
                    if match:
         | 
| 193 | 
            +
                      filtered_text = match.group(2)
         | 
| 194 | 
            +
                      filtered_text = re.sub(phraseC, "", filtered_text, flags=re.DOTALL)  # Replaces the matched pattern with an empty string
         | 
| 195 | 
            +
                      return filtered_text
         | 
| 196 | 
            +
                    else:
         | 
| 197 | 
            +
                      return filtered_text
         | 
| 198 | 
            +
              else:
         | 
| 199 | 
            +
                    # Handle the case where no match is found
         | 
| 200 | 
            +
                    return text
         | 
| 201 | 
            +
                  
         | 
| 202 | 
             
            def upload_to_ftp(filename):
         | 
| 203 | 
             
                try:
         | 
| 204 | 
             
                    transport = paramiko.Transport((FTP_HOST, 22))
         | 
|  | |
| 301 | 
             
                    filename= f'rv_IP_{timestamp}.png'
         | 
| 302 | 
             
                    print("-- using image file --")
         | 
| 303 | 
             
                    print(caption)
         | 
| 304 | 
            +
                    print("-- generating further caption --")
         | 
| 305 | 
            +
             | 
| 306 | 
            +
                    
         | 
| 307 | 
            +
                    system_prompt_rewrite = (
         | 
| 308 | 
            +
                        "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
         | 
| 309 | 
            +
                    )
         | 
| 310 | 
            +
                    user_prompt_rewrite = (
         | 
| 311 | 
            +
                        "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
         | 
| 312 | 
            +
                    )
         | 
| 313 | 
            +
                    user_prompt_rewrite_2 = (
         | 
| 314 | 
            +
                        "Rephrase this scene to have more elaborate details: "
         | 
| 315 | 
            +
                    )
         | 
| 316 | 
            +
                    input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
         | 
| 317 | 
            +
                    input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
         | 
| 318 | 
            +
                    print("-- got prompt --")
         | 
| 319 | 
            +
                    # Encode the input text and include the attention mask
         | 
| 320 | 
            +
                    encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
         | 
| 321 | 
            +
                    encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
         | 
| 322 | 
            +
                    # Ensure all values are on the correct device
         | 
| 323 | 
            +
                    input_ids = encoded_inputs["input_ids"].to(device)
         | 
| 324 | 
            +
                    input_ids_2 = encoded_inputs_2["input_ids"].to(device)
         | 
| 325 | 
            +
                    attention_mask = encoded_inputs["attention_mask"].to(device)
         | 
| 326 | 
            +
                    attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
         | 
| 327 | 
            +
                    print("-- tokenize prompt --")
         | 
| 328 | 
            +
                      # Google T5
         | 
| 329 | 
            +
                    #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
         | 
| 330 | 
            +
                    outputs = model.generate(
         | 
| 331 | 
            +
                        input_ids=input_ids,
         | 
| 332 | 
            +
                        attention_mask=attention_mask,
         | 
| 333 | 
            +
                        max_new_tokens=512,
         | 
| 334 | 
            +
                        temperature=0.2,
         | 
| 335 | 
            +
                        top_p=0.9,
         | 
| 336 | 
            +
                        do_sample=True,
         | 
| 337 | 
            +
                    )
         | 
| 338 | 
            +
                    outputs_2 = model.generate(
         | 
| 339 | 
            +
                        input_ids=input_ids_2,
         | 
| 340 | 
            +
                        attention_mask=attention_mask_2,
         | 
| 341 | 
            +
                        max_new_tokens=65,
         | 
| 342 | 
            +
                        temperature=0.2,
         | 
| 343 | 
            +
                        top_p=0.9,
         | 
| 344 | 
            +
                        do_sample=True,
         | 
| 345 | 
            +
                    )
         | 
| 346 | 
            +
                    # Use the encoded tensor 'text_inputs' here
         | 
| 347 | 
            +
                    enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
         | 
| 348 | 
            +
                    enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
         | 
| 349 | 
            +
                    print('-- generated prompt --')
         | 
| 350 | 
            +
                    enhanced_prompt = filter_text(enhanced_prompt,prompt)
         | 
| 351 | 
            +
                    enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
         | 
| 352 | 
            +
                    print('-- filtered prompt --')
         | 
| 353 | 
            +
                    print(enhanced_prompt)
         | 
| 354 | 
            +
                    print('-- filtered prompt 2 --')
         | 
| 355 | 
            +
                    print(enhanced_prompt_2)
         | 
| 356 | 
            +
             | 
| 357 | 
            +
             | 
| 358 | 
            +
             | 
| 359 | 
            +
                  
         | 
| 360 | 
             
                    print('-- generating image --')
         | 
| 361 | 
             
                    sd_image = ip_model.generate(
         | 
| 362 | 
             
                            pil_image_1=sd_image_a,
         | 
