Spaces:

neuralleap
/

Healthelic-Burmese-LLM

Sleeping

App Files Files Community

neuralleap commited on Apr 17

Commit

93076d0

verified ·

1 Parent(s): bbb4028

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -121

app.py CHANGED Viewed

@@ -1,13 +1,18 @@
 import gradio as gr
 import os
 import time
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from huggingface_hub import HfApi
 import requests
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
-# Configure requests to be more resilient
 retry_strategy = Retry(
     total=5,
     backoff_factor=1,
@@ -19,100 +24,74 @@ session = requests.Session()
 session.mount("https://", adapter)
 session.mount("http://", adapter)
-# Set longer timeout for model downloads
-os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600"  # 10 minutes timeout
-# Model name
-model_name = "WYNN747/Burmese-GPT-v3"
-# Function to load model with retries
 def load_model_with_retries(model_name, max_retries=3, retry_delay=5):
     for attempt in range(max_retries):
         try:
-            print(f"Loading model attempt {attempt+1}/{max_retries}")
             tokenizer = AutoTokenizer.from_pretrained(
-                model_name,
-                use_fast=False,  # Sometimes the fast tokenizer causes issues
-                local_files_only=False,
-                token=os.environ.get("HF_TOKEN", None)  # Use token if available
             )
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
-                local_files_only=False,
-                token=os.environ.get("HF_TOKEN", None),
                 trust_remote_code=True,
-                low_cpu_mem_usage=True,  # Help with memory issues
-                torch_dtype="auto"  # Use appropriate dtype
-            )
             return tokenizer, model
         except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
             if attempt < max_retries - 1:
-                print(f"Timeout error: {str(e)}. Retrying in {retry_delay} seconds...")
                 time.sleep(retry_delay)
-                retry_delay *= 2  # Exponential backoff
             else:
-                raise Exception(f"Failed to load model after {max_retries} attempts: {str(e)}")
         except Exception as e:
-            raise Exception(f"Error loading model: {str(e)}")
-# Load model
 try:
     tokenizer, model = load_model_with_retries(model_name)
-    print("Model loaded successfully!")
 except Exception as e:
-    print(f"Error loading model: {str(e)}")
-    # Create placeholder objects for UI to start
-    # This allows the UI to start even if model loading fails initially
-    tokenizer = None
-    model = None
 def generate_text(prompt, max_length=100, temperature=0.7):
-    """Generate text based on the input prompt."""
     global tokenizer, model
-    # Check if model is loaded
     if tokenizer is None or model is None:
         try:
-            # Try loading the model again if it failed initially
             tokenizer, model = load_model_with_retries(model_name)
-            print("Model loaded on demand")
         except Exception as e:
-            return f"Error: Model could not be loaded. Please check your internet connection and try again. Details: {str(e)}"
     try:
-        # Process the input
-        inputs = tokenizer(prompt, return_tensors="pt")
-        # Generate
-        outputs = model.generate(
-            inputs["input_ids"],
-            max_length=max_length,
-            temperature=temperature,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id if hasattr(tokenizer, 'eos_token_id') else tokenizer.pad_token_id,
-            num_return_sequences=1,
-            repetition_penalty=1.2,  # Reduce repetition
-            top_k=50,
-            top_p=0.95
-        )
-        # Decode and return the generated text
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return generated_text
     except Exception as e:
-        return f"Error during text generation: {str(e)}"
-# Create Gradio interface with better error handling
 with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
-    gr.Markdown("# Burmese-GPT-v3 Text Generation")
-    gr.Markdown("Enter a prompt in Burmese to generate text using the Burmese-GPT-v3 model.")
-    # Add status indicator
     with gr.Row():
-        model_status = gr.Markdown("⚠️ Model status: Checking..." if model is None else "✅ Model loaded and ready")
-    # Model loading button (for manual retry)
     def load_model_manually():
         global tokenizer, model
         try:
@@ -120,82 +99,53 @@ with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
             return "✅ Model loaded successfully!"
         except Exception as e:
             return f"❌ Failed to load model: {str(e)}"
-    load_button = gr.Button("Retry Loading Model")
     load_button.click(fn=load_model_manually, outputs=model_status)
-    # Add model info
-    gr.Markdown("### Model Information")
-    gr.Markdown("- **Model Name**: WYNN747/Burmese-GPT-v3")
-    gr.Markdown("- **Description**: A language model for Burmese text generation")
-    # Input components
     with gr.Row():
         with gr.Column(scale=3):
-            prompt = gr.Textbox(
-                lines=5,
-                placeholder="Enter your Burmese text prompt here...",
-                label="Prompt"
-            )
         with gr.Column(scale=1):
-            max_length = gr.Slider(
-                minimum=50,
-                maximum=500,
-                value=100,
-                step=10,
-                label="Max Length"
-            )
-            temperature = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.7,
-                step=0.1,
-                label="Temperature"
-            )
-    # Generate button
-    generate_btn = gr.Button("Generate Text", variant="primary")
-    # Output
-    output = gr.Textbox(lines=10, label="Generated Text")
-    # Set up the generation function
     generate_btn.click(
         fn=generate_text,
         inputs=[prompt, max_length, temperature],
         outputs=output
     )
-    # Add examples if available
-    with gr.Accordion("Examples", open=False):
-        gr.Markdown("Click on any example to try it:")
-        example_prompts = [
             ["ဟုတ်ကဲ့ ကျွန်တော် ဗမာစကား ပြောတတ်ပါတယ်။", 150, 0.7],
-            ["မြန်မာနိုင်ငံမှာ", 200, 0.8],
         ]
-        for idx, example in enumerate(example_prompts):
             example_btn = gr.Button(f"Example {idx+1}: {example[0][:20]}...")
             example_btn.click(
-                lambda e=example: (e[0], e[1], e[2]),
-                inputs=[],
                 outputs=[prompt, max_length, temperature]
             ).then(
                 fn=generate_text,
                 inputs=[prompt, max_length, temperature],
                 outputs=output
             )
-    # Add troubleshooting section
-    gr.Markdown("### Troubleshooting")
     gr.Markdown("""
-    - If you see timeout errors, try refreshing the page or clicking "Retry Loading Model"
-    - If the model still fails to load, try again later when network conditions improve
-    - Make sure you have a stable internet connection
-    """)
-# Launch the app with appropriate settings
 demo.launch(
     show_error=True,
-    server_name="0.0.0.0",  # Listen on all network interfaces
-    share=False  # Set to True for temporary public link
-)

 import gradio as gr
 import os
 import time
+import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import requests
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
+# Configuration
+model_name = "WYNN747/Burmese-GPT-v3"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600"  # 10 minutes
+# Configure resilient HTTP session
 retry_strategy = Retry(
     total=5,
     backoff_factor=1,
 session.mount("https://", adapter)
 session.mount("http://", adapter)
+tokenizer = None
+model = None
 def load_model_with_retries(model_name, max_retries=3, retry_delay=5):
     for attempt in range(max_retries):
         try:
+            print(f"🔄 Loading model attempt {attempt+1}/{max_retries}")
             tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                use_fast=True,
+                token=os.environ.get("HF_TOKEN", None)
             )
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
+                torch_dtype=torch.float16,
                 trust_remote_code=True,
+                low_cpu_mem_usage=True
+            ).to(device)
+            print("✅ Model loaded successfully!")
             return tokenizer, model
         except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
             if attempt < max_retries - 1:
+                print(f"⚠️ Timeout: {str(e)}. Retrying in {retry_delay} seconds...")
                 time.sleep(retry_delay)
+                retry_delay *= 2
             else:
+                raise Exception(f"❌ Failed to load model after {max_retries} attempts: {str(e)}")
         except Exception as e:
+            raise Exception(f"❌ Error loading model: {str(e)}")
 try:
     tokenizer, model = load_model_with_retries(model_name)
 except Exception as e:
+    print(str(e))
 def generate_text(prompt, max_length=100, temperature=0.7):
     global tokenizer, model
     if tokenizer is None or model is None:
         try:
             tokenizer, model = load_model_with_retries(model_name)
         except Exception as e:
+            return f"❌ Model could not be loaded. Details: {str(e)}"
     try:
+        inputs = tokenizer(prompt, return_tensors="pt").to(device)
+        model.eval()
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs["input_ids"],
+                max_length=max_length,
+                temperature=temperature,
+                do_sample=True,
+                top_k=50,
+                top_p=0.95,
+                repetition_penalty=1.2,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
+        return f"❌ Text generation error: {str(e)}"
+# Gradio UI
 with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
+    gr.Markdown("# 📝 Burmese-GPT-v3 Text Generator")
+    gr.Markdown("Enter a Burmese prompt below and generate text using the `WYNN747/Burmese-GPT-v3` model.")
     with gr.Row():
+        model_status = gr.Markdown("✅ Model is loaded and ready!" if model else "⚠️ Model not loaded yet.")
     def load_model_manually():
         global tokenizer, model
         try:
             return "✅ Model loaded successfully!"
         except Exception as e:
             return f"❌ Failed to load model: {str(e)}"
+    load_button = gr.Button("🔄 Retry Loading Model")
     load_button.click(fn=load_model_manually, outputs=model_status)
     with gr.Row():
         with gr.Column(scale=3):
+            prompt = gr.Textbox(lines=5, placeholder="Enter Burmese text here...", label="Prompt")
         with gr.Column(scale=1):
+            max_length = gr.Slider(50, 500, value=100, step=10, label="Max Length")
+            temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
+    generate_btn = gr.Button("🚀 Generate Text")
+    output = gr.Textbox(lines=10, label="Generated Output")
     generate_btn.click(
         fn=generate_text,
         inputs=[prompt, max_length, temperature],
         outputs=output
     )
+    with gr.Accordion("📌 Examples", open=False):
+        gr.Markdown("Try these example prompts:")
+        examples = [
             ["ဟုတ်ကဲ့ ကျွန်တော် ဗမာစကား ပြောတတ်ပါတယ်။", 150, 0.7],
+            ["မြန်မာနိုင်ငံမှာ", 200, 0.8]
         ]
+        for idx, example in enumerate(examples):
             example_btn = gr.Button(f"Example {idx+1}: {example[0][:20]}...")
             example_btn.click(
+                lambda e=example: (e[0], e[1], e[2]),
+                inputs=[],
                 outputs=[prompt, max_length, temperature]
             ).then(
                 fn=generate_text,
                 inputs=[prompt, max_length, temperature],
                 outputs=output
             )
+    gr.Markdown("### 🛠️ Troubleshooting")
     gr.Markdown("""
+- Try the "Retry Loading Model" button if the model fails to load.
+- Keep prompts short initially to test responsiveness.
+- Make sure you are using a GPU-enabled space (T4 Medium or better).
+""")
 demo.launch(
     show_error=True,
+    server_name="0.0.0.0",
+    share=False
+)