ford442 commited on
Commit
4b7af22
·
verified ·
1 Parent(s): 2b64a43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +356 -147
app.py CHANGED
@@ -5,38 +5,119 @@
5
  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
6
  # copies of the Software, and to permit persons to whom the Software is
7
  import spaces
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- import os
11
  import random
12
  import uuid
13
  import gradio as gr
14
  import numpy as np
15
  from PIL import Image
16
- import torch
17
- #import diffusers
18
- from diffusers import AutoencoderKL, StableDiffusionXLPipeline, UNet2DConditionModel
19
  from diffusers import EulerAncestralDiscreteScheduler
20
  from typing import Tuple
21
  import paramiko
22
  import datetime
23
- #from diffusers import DPMSolverSDEScheduler
24
- from diffusers.models.attention_processor import AttnProcessor2_0
25
- from transformers import CLIPTextModelWithProjection, CLIPTextModel
 
 
 
 
 
 
 
 
26
  torch.backends.cuda.matmul.allow_tf32 = False
27
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
28
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
29
  torch.backends.cudnn.allow_tf32 = False
30
  torch.backends.cudnn.deterministic = False
31
  torch.backends.cudnn.benchmark = False
32
- torch.backends.cuda.preferred_blas_library="cublas"
33
- torch.backends.cuda.preferred_linalg_library="cusolver"
34
  torch.set_float32_matmul_precision("highest")
35
 
36
- FTP_HOST = "1ink.us"
37
- FTP_USER = "ford442"
38
- FTP_PASS = os.getenv("FTP_PASS")
39
- FTP_DIR = "1ink.us/stable_diff/" # Remote directory on FTP server
40
 
41
  DESCRIPTIONXX = """
42
  ## ⚡⚡⚡⚡ REALVISXL V5.0 BF16 (Tester B) ⚡⚡⚡⚡
@@ -81,77 +162,137 @@ styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
81
  DEFAULT_STYLE_NAME = "Style Zero"
82
  STYLE_NAMES = list(styles.keys())
83
  HF_TOKEN = os.getenv("HF_TOKEN")
84
- os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
 
 
 
85
 
 
 
86
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
87
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def load_and_prepare_model():
89
- #vaeRV = AutoencoderKL.from_pretrained("SG161222/RealVisXL_V5.0", subfolder='vae', safety_checker=None, use_safetensors=True, token=True)
90
- #vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False, low_cpu_mem_usage=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
91
- vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", low_cpu_mem_usage=False, safety_checker=None, use_safetensors=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
92
- #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
93
- #sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
94
- #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", token=True) #, beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True, token=True)
95
  #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
 
 
 
96
  pipe = StableDiffusionXLPipeline.from_pretrained(
97
  'ford442/RealVisXL_V5.0_BF16',
98
  #torch_dtype=torch.bfloat16,
99
- token=True,
100
  add_watermarker=False,
101
- #text_encoder=None,
102
- #text_encoder_2=None,
103
- unet=None,
104
- vae=None,
105
  )
106
- pipe.unet=UNet2DConditionModel.from_pretrained(
107
- 'ford442/RealVisXL_V5.0_BF16',
108
- low_cpu_mem_usage=False,
109
- subfolder='unet',
110
- upcast_attention=True,
111
- #attention_type='gated-text-image',
112
- token=True)
113
- #pipe.vae = vaeXL #.to(torch.bfloat16)
114
- #pipe.scheduler = sched
115
- #pipe.vae.do_resize=False
116
- #pipe.vae.vae_scale_factor=8
117
  #pipe.to(device)
118
  #pipe.to(torch.bfloat16)
119
  print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
120
  pipe.watermark=None
121
- pipe.safety_checker=None
 
 
 
 
 
 
 
 
 
 
 
122
  #pipe.unet.to(memory_format=torch.channels_last)
123
  #pipe.enable_vae_tiling()
124
- pipe.to(device, torch.bfloat16)
125
- pipe.vae = vaeXL.to(device) #.to('cpu') #.to(torch.bfloat16)
126
-
127
- pipe.unet.set_attn_processor(AttnProcessor2_0())
128
- pipe.vae.set_default_attn_processor()
129
-
 
 
130
  return pipe
131
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  pipe = load_and_prepare_model()
133
 
134
- text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True)#.to(device=device, dtype=torch.bfloat16)
135
- text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True)#.to(device=device, dtype=torch.bfloat16)
136
-
137
- MAX_SEED = np.iinfo(np.int32).max
138
 
139
  neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
140
 
141
- from huggingface_hub import InferenceClient
142
- client = InferenceClient(
143
- model="stable-diffusion-v1-5/stable-diffusion-v1-5",
144
- #provider="hf-inference",
145
- token=HF_TOKEN
146
- )
147
-
148
  def upload_to_ftp(filename):
149
  try:
150
  transport = paramiko.Transport((FTP_HOST, 22))
151
- if filename.endswith(".txt"):
152
- destination_path=FTP_DIR+'/txt/'+filename
153
- else:
154
- destination_path=FTP_DIR+filename
155
  transport.connect(username = FTP_USER, password = FTP_PASS)
156
  sftp = paramiko.SFTPClient.from_transport(transport)
157
  sftp.put(filename, destination_path)
@@ -161,65 +302,97 @@ def upload_to_ftp(filename):
161
  except Exception as e:
162
  print(f"FTP upload error: {e}")
163
 
164
- def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
165
- if style_name in styles:
166
- p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
167
- else:
168
- p, n = styles[DEFAULT_STYLE_NAME]
169
- if not negative:
170
- negative = ""
171
- return p.replace("{prompt}", positive), n + negative
172
-
173
- def save_image(img):
174
- unique_name = str(uuid.uuid4()) + ".png"
175
- img.save(unique_name,optimize=False,compress_level=0)
176
- return unique_name
177
-
178
  def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
179
- filename= f'tst_B_{timestamp}.txt'
180
  with open(filename, "w") as f:
181
- f.write(f"Realvis 5.0 (Tester B) \n")
182
  f.write(f"Date/time: {timestamp} \n")
183
  f.write(f"Prompt: {prompt} \n")
184
  f.write(f"Steps: {num_inference_steps} \n")
185
  f.write(f"Guidance Scale: {guidance_scale} \n")
186
  f.write(f"SPACE SETUP: \n")
187
- f.write(f"Use Model Dtype: no \n")
188
  f.write(f"Model Scheduler: Euler_a all_custom before cuda \n")
 
189
  f.write(f"To cuda and bfloat \n")
190
  upload_to_ftp(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
- def generate_api(
193
- prompt: str,
194
- negative_prompt: str = "",
195
- use_negative_prompt: bool = False,
196
- style_selection: str = "",
197
- width: int = 768,
198
- height: int = 768,
199
- guidance_scale: float = 4,
200
- num_inference_steps: int = 125,
201
- use_resolution_binning: bool = True,
202
- progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
203
- ):
204
- seed = random.randint(0, MAX_SEED)
205
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
206
- uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
207
- rv_image = client.text_to_image(
208
- prompt = prompt,
209
- negative_prompt = negative_prompt,
210
- height = height,
211
- width = width,
212
- num_inference_steps = num_inference_steps,
213
- guidance_scale = guidance_scale,
214
- seed = seed
215
- )
216
- sd_image_path = f"sd15_B_{timestamp}.png"
217
- rv_image.save(sd_image_path,optimize=False,compress_level=0)
218
- upload_to_ftp(sd_image_path)
219
- unique_name = str(uuid.uuid4()) + ".png"
220
- os.symlink(sd_image_path, unique_name)
221
- return [unique_name]
222
-
223
  @spaces.GPU(duration=40)
224
  def generate_30(
225
  prompt: str,
@@ -230,13 +403,16 @@ def generate_30(
230
  height: int = 768,
231
  guidance_scale: float = 4,
232
  num_inference_steps: int = 125,
 
233
  use_resolution_binning: bool = True,
234
- progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
235
  ):
 
 
 
 
236
  seed = random.randint(0, MAX_SEED)
237
  generator = torch.Generator(device='cuda').manual_seed(seed)
238
- pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
239
- pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
240
  options = {
241
  "prompt": [prompt],
242
  "negative_prompt": [negative_prompt],
@@ -247,17 +423,26 @@ def generate_30(
247
  "num_inference_steps": num_inference_steps,
248
  "generator": generator,
249
  "output_type": "pil",
 
250
  }
251
  if use_resolution_binning:
252
  options["use_resolution_binning"] = True
253
  images = []
254
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
255
- uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
 
256
  batch_options = options.copy()
257
  rv_image = pipe(**batch_options).images[0]
258
- sd_image_path = f"rv50_B_{timestamp}.png"
259
  rv_image.save(sd_image_path,optimize=False,compress_level=0)
260
- upload_to_ftp(sd_image_path)
 
 
 
 
 
 
 
261
  unique_name = str(uuid.uuid4()) + ".png"
262
  os.symlink(sd_image_path, unique_name)
263
  return [unique_name]
@@ -272,13 +457,16 @@ def generate_60(
272
  height: int = 768,
273
  guidance_scale: float = 4,
274
  num_inference_steps: int = 125,
 
275
  use_resolution_binning: bool = True,
276
- progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
277
  ):
 
 
 
 
278
  seed = random.randint(0, MAX_SEED)
279
  generator = torch.Generator(device='cuda').manual_seed(seed)
280
- pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
281
- pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
282
  options = {
283
  "prompt": [prompt],
284
  "negative_prompt": [negative_prompt],
@@ -289,6 +477,7 @@ def generate_60(
289
  "num_inference_steps": num_inference_steps,
290
  "generator": generator,
291
  "output_type": "pil",
 
292
  }
293
  if use_resolution_binning:
294
  options["use_resolution_binning"] = True
@@ -296,8 +485,11 @@ def generate_60(
296
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
297
  uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
298
  batch_options = options.copy()
 
 
 
299
  rv_image = pipe(**batch_options).images[0]
300
- sd_image_path = f"rv50_B_{timestamp}.png"
301
  rv_image.save(sd_image_path,optimize=False,compress_level=0)
302
  upload_to_ftp(sd_image_path)
303
  unique_name = str(uuid.uuid4()) + ".png"
@@ -314,13 +506,16 @@ def generate_90(
314
  height: int = 768,
315
  guidance_scale: float = 4,
316
  num_inference_steps: int = 125,
 
317
  use_resolution_binning: bool = True,
318
- progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
319
  ):
 
 
 
 
320
  seed = random.randint(0, MAX_SEED)
321
  generator = torch.Generator(device='cuda').manual_seed(seed)
322
- pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
323
- pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
324
  options = {
325
  "prompt": [prompt],
326
  "negative_prompt": [negative_prompt],
@@ -331,6 +526,7 @@ def generate_90(
331
  "num_inference_steps": num_inference_steps,
332
  "generator": generator,
333
  "output_type": "pil",
 
334
  }
335
  if use_resolution_binning:
336
  options["use_resolution_binning"] = True
@@ -338,8 +534,11 @@ def generate_90(
338
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
339
  uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
340
  batch_options = options.copy()
 
 
 
341
  rv_image = pipe(**batch_options).images[0]
342
- sd_image_path = f"rv50_B_{timestamp}.png"
343
  rv_image.save(sd_image_path,optimize=False,compress_level=0)
344
  upload_to_ftp(sd_image_path)
345
  unique_name = str(uuid.uuid4()) + ".png"
@@ -387,7 +586,6 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
387
  run_button_30 = gr.Button("Run 30 Seconds", scale=0)
388
  run_button_60 = gr.Button("Run 60 Seconds", scale=0)
389
  run_button_90 = gr.Button("Run 90 Seconds", scale=0)
390
- run_button_api = gr.Button("Run API", scale=0)
391
  result = gr.Gallery(label="Result", columns=1, show_label=False)
392
 
393
  with gr.Row():
@@ -414,14 +612,14 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
414
  with gr.Row():
415
  width = gr.Slider(
416
  label="Width",
417
- minimum=256,
418
  maximum=MAX_IMAGE_SIZE,
419
  step=64,
420
  value=768,
421
  )
422
  height = gr.Slider(
423
  label="Height",
424
- minimum=256,
425
  maximum=MAX_IMAGE_SIZE,
426
  step=64,
427
  value=768,
@@ -436,11 +634,20 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
436
  )
437
  num_inference_steps = gr.Slider(
438
  label="Number of inference steps",
439
- minimum=1,
440
  maximum=1000,
441
- step=1,
442
  value=180,
443
  )
 
 
 
 
 
 
 
 
 
444
 
445
  gr.Examples(
446
  examples=examples,
@@ -470,6 +677,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
470
  height,
471
  guidance_scale,
472
  num_inference_steps,
 
473
  ],
474
  outputs=[result],
475
  )
@@ -489,6 +697,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
489
  height,
490
  guidance_scale,
491
  num_inference_steps,
 
492
  ],
493
  outputs=[result],
494
  )
@@ -508,29 +717,11 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
508
  height,
509
  guidance_scale,
510
  num_inference_steps,
 
511
  ],
512
  outputs=[result],
513
  )
514
-
515
- gr.on(
516
- triggers=[
517
- run_button_api.click,
518
- ],
519
- # api_name="generate", # Add this line
520
- fn=generate_api,
521
- inputs=[
522
- prompt,
523
- negative_prompt,
524
- use_negative_prompt,
525
- style_selection,
526
- width,
527
- height,
528
- guidance_scale,
529
- num_inference_steps,
530
- ],
531
- outputs=[result],
532
- )
533
-
534
  gr.Markdown("### REALVISXL V5.0")
535
  predefined_gallery = gr.Gallery(label="REALVISXL V5.0", columns=3, show_label=False, value=load_predefined_images1())
536
 
@@ -567,6 +758,24 @@ title = "Text Generator Demo GPT-Neo"
567
  description = "Text Generator Application by ecarbo"
568
 
569
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  demo_interface = demo.queue(max_size=50) # Remove .launch() here
571
 
572
  text_gen_interface = gr.Interface(
 
5
  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
6
  # copies of the Software, and to permit persons to whom the Software is
7
  import spaces
8
+ import os
9
+ os.environ["SAFETENSORS_FAST_GPU"] = "1"
10
 
11
+ import subprocess
12
+
13
+ import re
14
+
15
+ def find_cuda_directories(search_paths=None):
16
+ """Finds directories that contain "cuda" and a version number in their name.
17
+
18
+ Args:
19
+ search_paths: A list of directories to search. If None, uses common paths.
20
+
21
+ Returns:
22
+ A dictionary where keys are directory paths and values are extracted versions.
23
+ Returns an empty dictionary if no CUDA directories are found.
24
+ """
25
+
26
+ if search_paths is None:
27
+ # Common CUDA installation locations (customize as needed)
28
+ search_paths = [
29
+ "/usr/local", # Linux
30
+ "/usr/lib", # Linux
31
+ "/opt", # Linux
32
+ "/Program Files", # Windows
33
+ "/Applications", # macOS (less common)
34
+ os.path.expanduser("~") # Check user's home directory
35
+ ]
36
+ if os.name == 'nt': #Windows
37
+ search_paths.append("C:\\Program Files")
38
+ search_paths.append("C:\\Program Files (x86)")
39
+
40
+ cuda_dirs = {}
41
+
42
+ for path in search_paths:
43
+ if os.path.exists(path): # Check if the path exists
44
+ for root, dirs, files in os.walk(path): # Walk recursively
45
+ for dir_name in dirs:
46
+ match = re.search(r"cuda(\d+(\.\d+)*)", dir_name, re.IGNORECASE) # Regex for cuda and version
47
+ if match:
48
+ full_path = os.path.join(root, dir_name)
49
+ version = match.group(1)
50
+ cuda_dirs[full_path] = version
51
+
52
+ return cuda_dirs
53
+
54
+ #subprocess.run(['sh', './torch.sh'])
55
+
56
+ #import sys
57
+ #conda_prefix = os.path.expanduser("~/miniconda3")
58
+ #conda_bin = os.path.join(conda_prefix, "bin")
59
+
60
+ # Add Conda's bin directory to your PATH
61
+ #os.environ["PATH"] = conda_bin + os.pathsep + os.environ["PATH"]
62
+
63
+ # Activate the base environment (adjust if needed)
64
+ #os.system(f'{conda_bin}/conda init --all')
65
+ #os.system(f'{conda_bin}/conda activate base')
66
+
67
+
68
+ #os.system(f'{conda_bin}/conda install nvidia/label/cudnn-9.3.0::cudnn')
69
+ #os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-libraries')
70
+ #os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-libraries-dev')
71
+ #os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-cudart')
72
+ #os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-cudart-dev')
73
+ #os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-nvcc')
74
+
75
+ #os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-toolkit')
76
+
77
+ #subprocess.run(['pip', 'install', 'git+https://github.com/hidet-org/hidet.git'])
78
+ #subprocess.run(['pip', 'install', 'git+https://github.com/ford442/hidet.git@thread'])
79
+
80
+ #os.system(f'{conda_bin}/conda install pytorch::pytorch-cuda')
81
+ #os.system(f'{conda_bin}/conda install rcdr_py37::tensorrt')
82
+ #subprocess.run(['sh', './hidet.sh'])
83
+ #subprocess.run(['sh', './modelopt.sh'])
84
+ #import hidet
85
+ #print(dir(hidet))
86
+ #import torch_tensorrt
87
 
 
88
  import random
89
  import uuid
90
  import gradio as gr
91
  import numpy as np
92
  from PIL import Image
93
+
94
+ import diffusers
95
+ from diffusers import AutoencoderKL, StableDiffusionXLPipeline
96
  from diffusers import EulerAncestralDiscreteScheduler
97
  from typing import Tuple
98
  import paramiko
99
  import datetime
100
+ import cyper
101
+ from image_gen_aux import UpscaleWithModel
102
+ import torch
103
+ #import torch._dynamo
104
+ #torch._dynamo.list_backends()
105
+ import time
106
+ import gc
107
+
108
+ import torch.nn.functional as F
109
+ from sageattention import sageattn
110
+
111
  torch.backends.cuda.matmul.allow_tf32 = False
112
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
113
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
114
  torch.backends.cudnn.allow_tf32 = False
115
  torch.backends.cudnn.deterministic = False
116
  torch.backends.cudnn.benchmark = False
117
+ # torch.backends.cuda.preferred_blas_library="cublas"
118
+ # torch.backends.cuda.preferred_linalg_library="cusolver"
119
  torch.set_float32_matmul_precision("highest")
120
 
 
 
 
 
121
 
122
  DESCRIPTIONXX = """
123
  ## ⚡⚡⚡⚡ REALVISXL V5.0 BF16 (Tester B) ⚡⚡⚡⚡
 
162
  DEFAULT_STYLE_NAME = "Style Zero"
163
  STYLE_NAMES = list(styles.keys())
164
  HF_TOKEN = os.getenv("HF_TOKEN")
165
+ FTP_HOST = os.getenv("FTP_HOST")
166
+ FTP_USER = os.getenv("FTP_USER")
167
+ FTP_PASS = os.getenv("FTP_PASS")
168
+ FTP_DIR = os.getenv("FTP_DIR")
169
 
170
+ # os.putenv('TORCH_LINALG_PREFER_CUSOLVER','1')
171
+ os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
172
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
173
+ os.environ["SAFETENSORS_FAST_GPU"] = "1"
174
+
175
+ upscaler = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
176
+
177
+ def scheduler_swap_callback(pipeline, step_index, timestep, callback_kwargs):
178
+ # adjust the batch_size of prompt_embeds according to guidance_scale
179
+ if step_index == int(pipeline.num_timesteps * 0.1):
180
+ print("-- swapping scheduler --")
181
+ # pipeline.scheduler = euler_scheduler
182
+ torch.set_float32_matmul_precision("high")
183
+ # pipe.vae = vae_b
184
+ torch.backends.cudnn.allow_tf32 = True
185
+ torch.backends.cuda.matmul.allow_tf32 = True
186
+ torch.backends.cudnn.deterministic = True
187
+ torch.backends.cuda.preferred_blas_library="cublaslt"
188
+ #if step_index == int(pipeline.num_timesteps * 0.5):
189
+ # torch.set_float32_matmul_precision("medium")
190
+ #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.float64)
191
+ #pipe.unet.to(torch.float64)
192
+ # pipe.guidance_scale=1.0
193
+ # pipe.scheduler.set_timesteps(num_inference_steps*.70)
194
+ # print(f"-- setting step {pipeline.num_timesteps * 0.1} --")
195
+ # pipeline.scheduler._step_index = pipeline.num_timesteps * 0.1
196
+ if step_index == int(pipeline.num_timesteps * 0.9):
197
+ torch.backends.cuda.preferred_blas_library="cublas"
198
+ torch.backends.cudnn.allow_tf32 = False
199
+ torch.backends.cuda.matmul.allow_tf32 = False
200
+ torch.set_float32_matmul_precision("highest")
201
+ #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.bfloat16)
202
+ #pipe.unet.to(torch.float64)
203
+ # pipe.vae = vae_a
204
+ # pipe.unet = unet_a
205
+ torch.backends.cudnn.deterministic = False
206
+ #pipe.unet.set_default_attn_processor()
207
+ print("-- swapping scheduler --")
208
+ # pipeline.scheduler = heun_scheduler
209
+ #pipe.scheduler.set_timesteps(num_inference_steps*.70)
210
+ # print(f"-- setting step {pipeline.num_timesteps * 0.9} --")
211
+ # pipeline.scheduler._step_index = pipeline.num_timesteps * 0.9
212
+ return {"latents": callback_kwargs["latents"]}
213
+
214
  def load_and_prepare_model():
215
+ sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1 ,use_karras_sigmas=True)
216
+ vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False, device_map='cpu') #.to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
217
+ #vaeRV = AutoencoderKL.from_pretrained("SG161222/RealVisXL_V5.0", subfolder='vae', safety_checker=None, use_safetensors=False).to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
 
 
 
218
  #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
219
+ #txt_1 = CLIPTextModel.from_pretrained(device_map??)
220
+ #txt_2 = CLIPTextModel.from_pretrained(vae too?)
221
+ #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
222
  pipe = StableDiffusionXLPipeline.from_pretrained(
223
  'ford442/RealVisXL_V5.0_BF16',
224
  #torch_dtype=torch.bfloat16,
 
225
  add_watermarker=False,
226
+ # low_cpu_mem_usage = False,
227
+ token = HF_TOKEN,
228
+ # scheduler = sched,
 
229
  )
230
+ #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1) #,use_karras_sigmas=True)
231
+ pipe.vae = vaeXL #.to(torch.bfloat16)
232
+ pipe.scheduler = sched
233
+
234
+ pipe.vae.do_resize = False
235
+ #pipe.vae.vae_scale_factor = 8
236
+ pipe.vae.do_convert_rgb = True
237
+
238
+ pipe.vae.set_default_attn_processor()
 
 
239
  #pipe.to(device)
240
  #pipe.to(torch.bfloat16)
241
  print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
242
  pipe.watermark=None
243
+ pipe.safety_checker=None
244
+
245
+ ''' # Freeze vae and unet
246
+ pipe.vae.requires_grad_(False)
247
+ pipe.unet.requires_grad_(False)
248
+ pipe.text_encoder.requires_grad_(False)
249
+ pipe.unet.eval()
250
+ pipe.vae.eval()
251
+ pipe.text_encoder.eval()
252
+ '''
253
+ #pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
254
+ #pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/FLUX-dev-lora-add_details.safetensors", low_cpu_mem_usage=False)
255
  #pipe.unet.to(memory_format=torch.channels_last)
256
  #pipe.enable_vae_tiling()
257
+ #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, mode='max-autotune') #.to(device=device, dtype=torch.bfloat16)
258
+ #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, mode='max-autotune-no-cudagraphs') #.to(device=device, dtype=torch.bfloat16)
259
+ #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, options={'epilogue_fusion': True, 'shape_padding': True}) #.to(device=device, dtype=torch.bfloat16)
260
+ #pipe.unet = torch.compile(pipe.unet, dynamic=False)
261
+ #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, options={"search_space": 0})
262
+ #pipe.unet = torch.compile(pipe.unet, backend="torch_tensorrt", dynamic=False, options={"precision": torch.bfloat16,"optimization_level": 4,})
263
+ pipe.to(torch.device('cuda:0'), torch.bfloat16)
264
+
265
  return pipe
266
+
267
+ #hidet.option.parallel_build(False)
268
+ #hidet.option.parallel_tune(2,2.0)
269
+ #torch._dynamo.config.suppress_errors = True
270
+ #torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
271
+
272
+ # more search
273
+ #hidet.torch.dynamo_config.search_space(0)
274
+ #hidet.torch.dynamo_config.dump_graph_ir("./local_graph")
275
+ # hidet.option.cache_dir("local_cache")
276
+ # automatically transform the model to use float16 data type
277
+ #hidet.torch.dynamo_config.use_fp16(True)
278
+ # use float16 data type as the accumulate data type in operators with reduction
279
+ #hidet.torch.dynamo_config.use_fp16_reduction(True)
280
+ # use tensorcore
281
+ #hidet.torch.dynamo_config.use_tensor_core()
282
+ #hidet.torch.dynamo_config.steal_weights(False)
283
+
284
+ # Preload and compile both models
285
+
286
  pipe = load_and_prepare_model()
287
 
288
+ MAX_SEED = np.iinfo(np.int64).max
 
 
 
289
 
290
  neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
291
 
 
 
 
 
 
 
 
292
  def upload_to_ftp(filename):
293
  try:
294
  transport = paramiko.Transport((FTP_HOST, 22))
295
+ destination_path=FTP_DIR+filename
 
 
 
296
  transport.connect(username = FTP_USER, password = FTP_PASS)
297
  sftp = paramiko.SFTPClient.from_transport(transport)
298
  sftp.put(filename, destination_path)
 
302
  except Exception as e:
303
  print(f"FTP upload error: {e}")
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
306
+ filename= f'rv_C_{timestamp}.txt'
307
  with open(filename, "w") as f:
308
+ f.write(f"Realvis 5.0 (Tester C) \n")
309
  f.write(f"Date/time: {timestamp} \n")
310
  f.write(f"Prompt: {prompt} \n")
311
  f.write(f"Steps: {num_inference_steps} \n")
312
  f.write(f"Guidance Scale: {guidance_scale} \n")
313
  f.write(f"SPACE SETUP: \n")
 
314
  f.write(f"Model Scheduler: Euler_a all_custom before cuda \n")
315
+ f.write(f"Model VAE: sdxl-vae-bf16\n")
316
  f.write(f"To cuda and bfloat \n")
317
  upload_to_ftp(filename)
318
+
319
+ code = r'''
320
+
321
+ import torch
322
+ import paramiko
323
+ import os
324
+
325
+ FTP_HOST = os.getenv("FTP_HOST")
326
+ FTP_USER = os.getenv("FTP_USER")
327
+ FTP_PASS = os.getenv("FTP_PASS")
328
+ FTP_DIR = os.getenv("FTP_DIR")
329
+
330
+ def scheduler_swap_callback(pipeline, step_index, timestep, callback_kwargs):
331
+ # adjust the batch_size of prompt_embeds according to guidance_scale
332
+ if step_index == int(pipeline.num_timesteps * 0.1):
333
+ print("-- swapping torch modes --")
334
+ # pipeline.scheduler = euler_scheduler
335
+ torch.set_float32_matmul_precision("high")
336
+ # pipe.vae = vae_b
337
+ torch.backends.cudnn.allow_tf32 = True
338
+ torch.backends.cuda.matmul.allow_tf32 = True
339
+ torch.backends.cudnn.deterministic = True
340
+ torch.backends.cuda.preferred_blas_library="cublaslt"
341
+ #if step_index == int(pipeline.num_timesteps * 0.5):
342
+ # torch.set_float32_matmul_precision("medium")
343
+ #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.float64)
344
+ #pipe.unet.to(torch.float64)
345
+ # pipe.guidance_scale=1.0
346
+ # pipe.scheduler.set_timesteps(num_inference_steps*.70)
347
+ # print(f"-- setting step {pipeline.num_timesteps * 0.1} --")
348
+ # pipeline.scheduler._step_index = pipeline.num_timesteps * 0.1
349
+ if step_index == int(pipeline.num_timesteps * 0.9):
350
+ torch.backends.cuda.preferred_blas_library="cublas"
351
+ torch.backends.cudnn.allow_tf32 = False
352
+ torch.backends.cuda.matmul.allow_tf32 = False
353
+ torch.set_float32_matmul_precision("highest")
354
+ #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.bfloat16)
355
+ #pipe.unet.to(torch.float64)
356
+ #pipeline.unet.set_default_attn_processor() ## custom ##
357
+ # pipe.vae = vae_a
358
+ # pipe.unet = unet_a
359
+ torch.backends.cudnn.deterministic = False
360
+ print("-- swapping torch modes --")
361
+ # pipeline.scheduler = heun_scheduler
362
+ #pipe.scheduler.set_timesteps(num_inference_steps*.70)
363
+ # print(f"-- setting step {pipeline.num_timesteps * 0.9} --")
364
+ # pipeline.scheduler._step_index = pipeline.num_timesteps * 0.9
365
+ return callback_kwargs
366
+
367
+ def upload_to_ftp(filename):
368
+ try:
369
+ transport = paramiko.Transport((FTP_HOST, 22))
370
+ destination_path=FTP_DIR+filename
371
+ transport.connect(username = FTP_USER, password = FTP_PASS)
372
+ sftp = paramiko.SFTPClient.from_transport(transport)
373
+ sftp.put(filename, destination_path)
374
+ sftp.close()
375
+ transport.close()
376
+ print(f"Uploaded {filename} to FTP server")
377
+ except Exception as e:
378
+ print(f"FTP upload error: {e}")
379
+
380
+ def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
381
+ filename= f'rv_B_{timestamp}.txt'
382
+ with open(filename, "w") as f:
383
+ f.write(f"Realvis 5.0 (Tester B) \n")
384
+ f.write(f"Date/time: {timestamp} \n")
385
+ f.write(f"Prompt: {prompt} \n")
386
+ f.write(f"Steps: {num_inference_steps} \n")
387
+ f.write(f"Guidance Scale: {guidance_scale} \n")
388
+ f.write(f"SPACE SETUP: \n")
389
+ f.write(f"Model VAE: sdxl-vae-bf16\n")
390
+ f.write(f"To cuda and bfloat \n")
391
+ return filename
392
 
393
+ '''
394
+ pyx = cyper.inline(code, fast_indexing=True, directives=dict(boundscheck=False, wraparound=False, language_level=3))
395
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  @spaces.GPU(duration=40)
397
  def generate_30(
398
  prompt: str,
 
403
  height: int = 768,
404
  guidance_scale: float = 4,
405
  num_inference_steps: int = 125,
406
+ sage: bool = False,
407
  use_resolution_binning: bool = True,
408
+ progress=gr.Progress(track_tqdm=True)
409
  ):
410
+ if sage==True:
411
+ F.scaled_dot_product_attention = sageattn
412
+ if sage==False:
413
+ F.scaled_dot_product_attention = F.scaled_dot_product_attention
414
  seed = random.randint(0, MAX_SEED)
415
  generator = torch.Generator(device='cuda').manual_seed(seed)
 
 
416
  options = {
417
  "prompt": [prompt],
418
  "negative_prompt": [negative_prompt],
 
423
  "num_inference_steps": num_inference_steps,
424
  "generator": generator,
425
  "output_type": "pil",
426
+ "callback_on_step_end": pyx.scheduler_swap_callback,
427
  }
428
  if use_resolution_binning:
429
  options["use_resolution_binning"] = True
430
  images = []
431
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
432
+ filename = pyx.uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
433
+ pyx.upload_to_ftp(filename)
434
  batch_options = options.copy()
435
  rv_image = pipe(**batch_options).images[0]
436
+ sd_image_path = f"rv_B_{timestamp}.png"
437
  rv_image.save(sd_image_path,optimize=False,compress_level=0)
438
+ pyx.upload_to_ftp(sd_image_path)
439
+ torch.set_float32_matmul_precision("medium")
440
+ with torch.no_grad():
441
+ upscale = upscaler(rv_image, tiling=True, tile_width=256, tile_height=256)
442
+ downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
443
+ downscale_path = f"rv50_upscale_{timestamp}.png"
444
+ downscale1.save(downscale_path,optimize=False,compress_level=0)
445
+ pyx.upload_to_ftp(downscale_path)
446
  unique_name = str(uuid.uuid4()) + ".png"
447
  os.symlink(sd_image_path, unique_name)
448
  return [unique_name]
 
457
  height: int = 768,
458
  guidance_scale: float = 4,
459
  num_inference_steps: int = 125,
460
+ sage: bool = False,
461
  use_resolution_binning: bool = True,
462
+ progress=gr.Progress(track_tqdm=True)
463
  ):
464
+ if sage==True:
465
+ F.scaled_dot_product_attention = sageattn
466
+ if sage==False:
467
+ F.scaled_dot_product_attention = F.scaled_dot_product_attention
468
  seed = random.randint(0, MAX_SEED)
469
  generator = torch.Generator(device='cuda').manual_seed(seed)
 
 
470
  options = {
471
  "prompt": [prompt],
472
  "negative_prompt": [negative_prompt],
 
477
  "num_inference_steps": num_inference_steps,
478
  "generator": generator,
479
  "output_type": "pil",
480
+ "callback_on_step_end": pyx.scheduler_swap_callback,
481
  }
482
  if use_resolution_binning:
483
  options["use_resolution_binning"] = True
 
485
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
486
  uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
487
  batch_options = options.copy()
488
+ gc.collect()
489
+ torch.cuda.empty_cache()
490
+ time.sleep(2)
491
  rv_image = pipe(**batch_options).images[0]
492
+ sd_image_path = f"rv_B_{timestamp}.png"
493
  rv_image.save(sd_image_path,optimize=False,compress_level=0)
494
  upload_to_ftp(sd_image_path)
495
  unique_name = str(uuid.uuid4()) + ".png"
 
506
  height: int = 768,
507
  guidance_scale: float = 4,
508
  num_inference_steps: int = 125,
509
+ sage: bool = False,
510
  use_resolution_binning: bool = True,
511
+ progress=gr.Progress(track_tqdm=True)
512
  ):
513
+ if sage==True:
514
+ F.scaled_dot_product_attention = sageattn
515
+ if sage==False:
516
+ F.scaled_dot_product_attention = F.scaled_dot_product_attention
517
  seed = random.randint(0, MAX_SEED)
518
  generator = torch.Generator(device='cuda').manual_seed(seed)
 
 
519
  options = {
520
  "prompt": [prompt],
521
  "negative_prompt": [negative_prompt],
 
526
  "num_inference_steps": num_inference_steps,
527
  "generator": generator,
528
  "output_type": "pil",
529
+ "callback_on_step_end": pyx.scheduler_swap_callback,
530
  }
531
  if use_resolution_binning:
532
  options["use_resolution_binning"] = True
 
534
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
535
  uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
536
  batch_options = options.copy()
537
+ gc.collect()
538
+ torch.cuda.empty_cache()
539
+ time.sleep(2)
540
  rv_image = pipe(**batch_options).images[0]
541
+ sd_image_path = f"rv_B_{timestamp}.png"
542
  rv_image.save(sd_image_path,optimize=False,compress_level=0)
543
  upload_to_ftp(sd_image_path)
544
  unique_name = str(uuid.uuid4()) + ".png"
 
586
  run_button_30 = gr.Button("Run 30 Seconds", scale=0)
587
  run_button_60 = gr.Button("Run 60 Seconds", scale=0)
588
  run_button_90 = gr.Button("Run 90 Seconds", scale=0)
 
589
  result = gr.Gallery(label="Result", columns=1, show_label=False)
590
 
591
  with gr.Row():
 
612
  with gr.Row():
613
  width = gr.Slider(
614
  label="Width",
615
+ minimum=448,
616
  maximum=MAX_IMAGE_SIZE,
617
  step=64,
618
  value=768,
619
  )
620
  height = gr.Slider(
621
  label="Height",
622
+ minimum=448,
623
  maximum=MAX_IMAGE_SIZE,
624
  step=64,
625
  value=768,
 
634
  )
635
  num_inference_steps = gr.Slider(
636
  label="Number of inference steps",
637
+ minimum=10,
638
  maximum=1000,
639
+ step=10,
640
  value=180,
641
  )
642
+ options = [True, False]
643
+ sage = gr.Radio(
644
+ show_label=True,
645
+ container=True,
646
+ interactive=True,
647
+ choices=options,
648
+ value=False,
649
+ label="Use SageAttention: ",
650
+ )
651
 
652
  gr.Examples(
653
  examples=examples,
 
677
  height,
678
  guidance_scale,
679
  num_inference_steps,
680
+ sage,
681
  ],
682
  outputs=[result],
683
  )
 
697
  height,
698
  guidance_scale,
699
  num_inference_steps,
700
+ sage,
701
  ],
702
  outputs=[result],
703
  )
 
717
  height,
718
  guidance_scale,
719
  num_inference_steps,
720
+ sage,
721
  ],
722
  outputs=[result],
723
  )
724
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
725
  gr.Markdown("### REALVISXL V5.0")
726
  predefined_gallery = gr.Gallery(label="REALVISXL V5.0", columns=3, show_label=False, value=load_predefined_images1())
727
 
 
758
  description = "Text Generator Application by ecarbo"
759
 
760
  if __name__ == "__main__":
761
+
762
+ cuda_directories = find_cuda_directories()
763
+
764
+ if cuda_directories:
765
+ print("Found CUDA directories:")
766
+ for directory, version in cuda_directories.items():
767
+ print(f"- {directory}: Version {version}")
768
+ else:
769
+ print("No CUDA directories found in the specified paths.")
770
+
771
+
772
+
773
+ # Example of how to find the "best" CUDA path (customize logic)
774
+ if cuda_directories:
775
+ # Simple example: just pick the first one. You might have more sophisticated selection criteria
776
+ best_cuda_path = list(cuda_directories.keys())
777
+ print(f"Using CUDA path: {best_cuda_path}")
778
+
779
  demo_interface = demo.queue(max_size=50) # Remove .launch() here
780
 
781
  text_gen_interface = gr.Interface(