Spaces:

prithivMLmods
/

core-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Feb 8

Commit

6c3e861

verified ·

1 Parent(s): 753dbac

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -8

app.py CHANGED Viewed

@@ -98,11 +98,11 @@ def clean_chat_history(chat_history):
 # ============================================
 # Environment variables and parameters for Stable Diffusion XL
-MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # Use SDXL Model repo path via MODEL_VAL_PATH env var
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
-BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))  # For potential batched image generation
 # Load the SDXL pipeline
 sd_pipe = StableDiffusionXLPipeline.from_pretrained(
@@ -113,7 +113,11 @@ sd_pipe = StableDiffusionXLPipeline.from_pretrained(
 ).to(device)
 sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config)
-# Optional: compile the model for speedup
 if USE_TORCH_COMPILE:
     sd_pipe.compile()
@@ -191,16 +195,16 @@ def generate(
     repetition_penalty: float = 1.2,
 ):
     """
-    Generates chatbot responses with support for multimodal input, TTS, and now image generation.
-    If the query starts with:
-      - "@tts1" or "@tts2", it triggers text-to-speech.
-      - "@image", it triggers image generation using the SDXL pipeline.
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
     # ----------------------------
-    #  NEW: IMAGE GENERATION BRANCH
     # ----------------------------
     if text.strip().lower().startswith("@image"):
         # Remove the "@image" tag and use the rest as prompt
@@ -343,4 +347,5 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
     demo.queue(max_size=20).launch(share=True)

 # ============================================
 # Environment variables and parameters for Stable Diffusion XL
+MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
+BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))  # For batched image generation
 # Load the SDXL pipeline
 sd_pipe = StableDiffusionXLPipeline.from_pretrained(
 ).to(device)
 sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config)
+# **Fix for dtype mismatch in the text encoder:**
+if torch.cuda.is_available():
+    sd_pipe.text_encoder = sd_pipe.text_encoder.half()
+# Optional: compile the model for speedup if enabled
 if USE_TORCH_COMPILE:
     sd_pipe.compile()
     repetition_penalty: float = 1.2,
 ):
     """
+    Generates chatbot responses with support for multimodal input, TTS, and image generation.
+    Special commands:
+      - "@tts1" or "@tts2": triggers text-to-speech.
+      - "@image": triggers image generation using the SDXL pipeline.
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
     # ----------------------------
+    #  IMAGE GENERATION BRANCH
     # ----------------------------
     if text.strip().lower().startswith("@image"):
         # Remove the "@image" tag and use the rest as prompt
 )
 if __name__ == "__main__":
+    # To create a public link, set share=True in launch().
     demo.queue(max_size=20).launch(share=True)