Spaces:

Alpha-VLLM
/

Lumina-Image-2.0

Running on Zero

Dakerqi commited on Feb 9

Commit

45eb930

verified ·

1 Parent(s): efcd292

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -76,13 +76,12 @@ def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empty_prompt
         )
-        device = text_encoder.device
-        text_input_ids = text_inputs.input_ids.to(device)
-        prompt_masks = text_inputs.attention_mask.to(device)
         prompt_embeds = text_encoder(
-            input_ids=text_input_ids,
-            attention_mask=prompt_masks,
             output_hidden_states=True,
         ).hidden_states[-2]
@@ -126,7 +125,7 @@ def model_main(args, master_port, rank, request_queue, response_queue, mp_barrie
     if args.num_gpus > 1:
         raise NotImplementedError("Inference with >1 GPUs not yet supported")
-    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b", token=hf_token)
     tokenizer.padding_side = "right"
     vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", token=hf_token).cuda()

         )
+        text_input_ids = text_inputs.input_ids
+        prompt_masks = text_inputs.attention_mask
         prompt_embeds = text_encoder(
+            input_ids=text_input_ids.cuda(),
+            attention_mask=prompt_masks.cuda(),
             output_hidden_states=True,
         ).hidden_states[-2]
     if args.num_gpus > 1:
         raise NotImplementedError("Inference with >1 GPUs not yet supported")
+    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b", token=hf_token).cuda()
     tokenizer.padding_side = "right"
     vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", token=hf_token).cuda()