Spaces:

prithivMLmods
/

core-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 6

Commit

5b985be

verified ·

1 Parent(s): ecce109

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -43

app.py CHANGED Viewed

@@ -42,6 +42,24 @@ h1 {
 }
 '''
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -88,23 +106,6 @@ def clean_chat_history(chat_history):
             cleaned.append(msg)
     return cleaned
-# Helper: returns HTML code for a thin light-green animated progress bar with a label.
-def progress_bar_html(label: str) -> str:
-    return f'''
-<div style="display: flex; align-items: center;">
-  <span>{label}</span>
-  <div style="flex-grow: 1; margin-left: 8px; height: 5px; background-color: lightgreen; overflow: hidden; position: relative;">
-    <div style="width: 100%; height: 100%; background: linear-gradient(90deg, rgba(255,255,255,0) 0%, rgba(255,255,255,0.5) 50%, rgba(255,255,255,0) 100%); animation: progressAnim 1s linear infinite;"></div>
-  </div>
-</div>
-<style>
-@keyframes progressAnim {{
-  0% {{ transform: translateX(-100%); }}
-  100% {{ transform: translateX(100%); }}
-}}
-</style>
-'''
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
@@ -185,7 +186,6 @@ def generate_image_fn(
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
-        # Wrap the pipeline call in autocast if using CUDA
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
@@ -214,13 +214,12 @@ def generate(
     text = input_dict["text"]
     files = input_dict.get("files", [])
-    # For image generation triggered by "@image"
     if text.strip().lower().startswith("@image"):
         # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
-        # Yield a progress bar with label "Generating Image"
-        progress_component = gr.HTML(progress_bar_html("Generating Image"))
-        yield progress_component
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
@@ -234,7 +233,7 @@ def generate(
             use_resolution_binning=True,
             num_images=1,
         )
-        # Clear the progress bar (replace with empty HTML) and then yield the image
         yield gr.HTML.update(value="")
         yield gr.Image(image_paths[0])
         return  # Exit early
@@ -255,7 +254,6 @@ def generate(
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
-    # If there are attached image files, use multimodal processing
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
@@ -277,19 +275,17 @@ def generate(
         thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
         thread.start()
         buffer = ""
-        # Yield a progress bar with label "Thinking..."
-        progress_component = gr.HTML(progress_bar_html("Thinking..."))
-        yield progress_component
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
-        # Clear the progress bar and yield the final result text.
-        yield gr.HTML.update(value="")
-        yield buffer
     else:
-        # For pure text responses:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -307,23 +303,23 @@ def generate(
             "num_beams": 1,
             "repetition_penalty": repetition_penalty,
         }
-        t = Thread(target=model.generate, kwargs=generation_kwargs)
-        t.start()
-        outputs = []
-        # Yield a progress bar with label "Thinking..."
-        progress_component = gr.HTML(progress_bar_html("Thinking..."))
-        yield progress_component
         for new_text in streamer:
-            outputs.append(new_text)
-        final_response = "".join(outputs)
-        # Clear the progress bar and yield the final plain text result.
-        yield gr.HTML.update(value="")
-        yield final_response
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:
-            output_file = asyncio.run(text_to_speech(final_response, voice))
             yield gr.Audio(output_file, autoplay=True)
 demo = gr.ChatInterface(

 }
 '''
+def progress_bar_html(label):
+    """Returns an HTML snippet with a label and an animated thin progress bar."""
+    return f"""
+    <div style="display: flex; align-items: center;">
+      <span style="margin-right: 10px;">{label}</span>
+      <div style="position: relative; width: 110px; height: 5px; background-color: #e0e0e0; border-radius: 2.5px; overflow: hidden;">
+        <div style="width: 100%; height: 100%; background-color: #90ee90; animation: progressAnimation 2s infinite;"></div>
+      </div>
+      <style>
+      @keyframes progressAnimation {{
+          0% {{ opacity: 1; }}
+          50% {{ opacity: 0.5; }}
+          100% {{ opacity: 1; }}
+      }}
+      </style>
+    </div>
+    """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
             cleaned.append(msg)
     return cleaned
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
     text = input_dict["text"]
     files = input_dict.get("files", [])
     if text.strip().lower().startswith("@image"):
         # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
+        # Show a progress bar for image generation
+        progress_html = progress_bar_html("Generating Image")
+        yield gr.HTML(progress_html)
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
             use_resolution_binning=True,
             num_images=1,
         )
+        # Remove the progress bar and then yield the generated image
         yield gr.HTML.update(value="")
         yield gr.Image(image_paths[0])
         return  # Exit early
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
         thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
         thread.start()
+        # Show a progress bar while processing the multimodal input
+        progress_html = progress_bar_html("Thinking...")
+        yield gr.HTML(progress_html)
         buffer = ""
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
+        # Update the same message to display the final result (removing the progress bar)
+        yield gr.HTML.update(value=buffer)
     else:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
             "num_beams": 1,
             "repetition_penalty": repetition_penalty,
         }
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Show a progress bar for text generation
+        progress_html = progress_bar_html("Thinking...")
+        yield gr.HTML(progress_html)
+        buffer = ""
         for new_text in streamer:
+            buffer += new_text
+            buffer = buffer.replace("<|im_end|>", "")
+            time.sleep(0.01)
+        # Replace the progress bar with the final text response
+        yield gr.HTML.update(value=buffer)
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:
+            output_file = asyncio.run(text_to_speech(buffer, voice))
             yield gr.Audio(output_file, autoplay=True)
 demo = gr.ChatInterface(