Spaces:

prithivMLmods
/

core-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 6

Commit

3fb8098

verified ·

1 Parent(s): 5b985be

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -41

app.py CHANGED Viewed

@@ -42,24 +42,6 @@ h1 {
 }
 '''
-def progress_bar_html(label):
-    """Returns an HTML snippet with a label and an animated thin progress bar."""
-    return f"""
-    <div style="display: flex; align-items: center;">
-      <span style="margin-right: 10px;">{label}</span>
-      <div style="position: relative; width: 110px; height: 5px; background-color: #e0e0e0; border-radius: 2.5px; overflow: hidden;">
-        <div style="width: 100%; height: 100%; background-color: #90ee90; animation: progressAnimation 2s infinite;"></div>
-      </div>
-      <style>
-      @keyframes progressAnimation {{
-          0% {{ opacity: 1; }}
-          50% {{ opacity: 0.5; }}
-          100% {{ opacity: 1; }}
-      }}
-      </style>
-    </div>
-    """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -186,6 +168,7 @@ def generate_image_fn(
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
@@ -214,12 +197,35 @@ def generate(
     text = input_dict["text"]
     files = input_dict.get("files", [])
     if text.strip().lower().startswith("@image"):
-        # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
-        # Show a progress bar for image generation
-        progress_html = progress_bar_html("Generating Image")
-        yield gr.HTML(progress_html)
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
@@ -233,8 +239,7 @@ def generate(
             use_resolution_binning=True,
             num_images=1,
         )
-        # Remove the progress bar and then yield the generated image
-        yield gr.HTML.update(value="")
         yield gr.Image(image_paths[0])
         return  # Exit early
@@ -275,16 +280,21 @@ def generate(
         thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
         thread.start()
-        # Show a progress bar while processing the multimodal input
-        progress_html = progress_bar_html("Thinking...")
-        yield gr.HTML(progress_html)
         buffer = ""
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
-        # Update the same message to display the final result (removing the progress bar)
-        yield gr.HTML.update(value=buffer)
     else:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
@@ -303,23 +313,28 @@ def generate(
             "num_beams": 1,
             "repetition_penalty": repetition_penalty,
         }
-        thread = Thread(target=model.generate, kwargs=generation_kwargs)
-        thread.start()
-        # Show a progress bar for text generation
-        progress_html = progress_bar_html("Thinking...")
-        yield gr.HTML(progress_html)
-        buffer = ""
         for new_text in streamer:
-            buffer += new_text
-            buffer = buffer.replace("<|im_end|>", "")
-            time.sleep(0.01)
-        # Replace the progress bar with the final text response
-        yield gr.HTML.update(value=buffer)
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:
-            output_file = asyncio.run(text_to_speech(buffer, voice))
             yield gr.Audio(output_file, autoplay=True)
 demo = gr.ChatInterface(

 }
 '''
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
+        # Wrap the pipeline call in autocast if using CUDA
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
     text = input_dict["text"]
     files = input_dict.get("files", [])
+    # Define an HTML template for the animated progress bar.
+    # The bar is a thin 5px line in light green with a simple opacity animation.
+    progress_bar_html = """
+<div style="display: flex; align-items: center;">
+    <span>{message}</span>
+    <div style="flex-grow: 1; margin-left: 10px;">
+         <div class="progress-bar"></div>
+    </div>
+</div>
+<style>
+.progress-bar {{
+    width: 100%;
+    height: 5px;
+    background: lightgreen;
+    animation: progressAnim 2s infinite;
+}}
+@keyframes progressAnim {{
+    0% {{ opacity: 0.5; }}
+    50% {{ opacity: 1; }}
+    100% {{ opacity: 0.5; }}
+}}
+</style>
+"""
     if text.strip().lower().startswith("@image"):
+        # Remove the "@image" tag and use the rest as prompt.
         prompt = text[len("@image"):].strip()
+        # Yield progress bar for image generation.
+        yield gr.HTML(progress_bar_html.format(message="Generating Image..."))
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
             use_resolution_binning=True,
             num_images=1,
         )
+        # Once the image is generated, yield the image (thus replacing the progress bar).
         yield gr.Image(image_paths[0])
         return  # Exit early
         thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
         thread.start()
+        # Yield progress bar for multimodal input processing.
+        yield gr.HTML(progress_bar_html.format(message="Thinking..."))
         buffer = ""
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
+            # During streaming, update the progress UI (progress bar remains visible).
+            combined_html = f"""
+<div style="display: flex; flex-direction: column;">
+  {progress_bar_html.format(message="Thinking...")}
+  <div style="margin-top: 10px;">{buffer}</div>
+</div>
+"""
+            yield gr.HTML(combined_html)
     else:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             "num_beams": 1,
             "repetition_penalty": repetition_penalty,
         }
+        t = Thread(target=model.generate, kwargs=generation_kwargs)
+        t.start()
+        # Yield initial progress bar for text generation.
+        yield gr.HTML(progress_bar_html.format(message="Thinking..."))
+        outputs = []
         for new_text in streamer:
+            outputs.append(new_text)
+            combined_html = f"""
+<div style="display: flex; flex-direction: column;">
+  {progress_bar_html.format(message="Thinking...")}
+  <div style="margin-top: 10px;">{''.join(outputs)}</div>
+</div>
+"""
+            yield gr.HTML(combined_html)
+        final_response = "".join(outputs)
+        # Final response: progress bar is removed and only the generated text is shown.
+        yield final_response
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:
+            output_file = asyncio.run(text_to_speech(final_response, voice))
             yield gr.Audio(output_file, autoplay=True)
 demo = gr.ChatInterface(