Spaces:

neuralleap
/

Healthelic-Burmese-LLM

Sleeping

App Files Files Community

neuralleap commited on Apr 16

Commit

39d67a5

verified ·

1 Parent(s): c84df0d

Update app.py

Browse files

Files changed (1) hide show

app.py +189 -30

app.py CHANGED Viewed

@@ -1,40 +1,199 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Load model and tokenizer
 model_name = "WYNN747/Burmese-GPT-v3"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
 def generate_text(prompt, max_length=100, temperature=0.7):
     """Generate text based on the input prompt."""
-    inputs = tokenizer(prompt, return_tensors="pt")
-    # Generate
-    outputs = model.generate(
-        inputs["input_ids"],
-        max_length=max_length,
-        temperature=temperature,
-        do_sample=True,
-        pad_token_id=tokenizer.eos_token_id
     )
-    # Decode and return the generated text
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_text
-# Create Gradio interface
-demo = gr.Interface(
-    fn=generate_text,
-    inputs=[
-        gr.Textbox(lines=5, placeholder="Enter your Burmese text prompt here..."),
-        gr.Slider(minimum=50, maximum=500, value=100, step=10, label="Max Length"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
-    ],
-    outputs=gr.Textbox(lines=10, label="Generated Text"),
-    title="Burmese-GPT-v3 Text Generation",
-    description="Enter a prompt in Burmese to generate text using the Burmese-GPT-v3 model."
-)
-# Launch the app
-demo.launch()

 import gradio as gr
+import os
+import time
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import HfApi
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+# Configure requests to be more resilient
+retry_strategy = Retry(
+    total=5,
+    backoff_factor=1,
+    status_forcelist=[429, 500, 502, 503, 504],
+    allowed_methods=["HEAD", "GET", "OPTIONS"]
+)
+adapter = HTTPAdapter(max_retries=retry_strategy)
+session = requests.Session()
+session.mount("https://", adapter)
+session.mount("http://", adapter)
+# Set longer timeout for model downloads
+os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600"  # 10 minutes timeout
+# Model name
 model_name = "WYNN747/Burmese-GPT-v3"
+# Function to load model with retries
+def load_model_with_retries(model_name, max_retries=3, retry_delay=5):
+    for attempt in range(max_retries):
+        try:
+            print(f"Loading model attempt {attempt+1}/{max_retries}")
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                use_fast=False,  # Sometimes the fast tokenizer causes issues
+                local_files_only=False,
+                token=os.environ.get("HF_TOKEN", None),  # Use token if available
+                trust_remote_code=True,
+                timeout=600  # 10 minutes timeout
+            )
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                local_files_only=False,
+                token=os.environ.get("HF_TOKEN", None),
+                trust_remote_code=True,
+                timeout=600,  # 10 minutes timeout
+                low_cpu_mem_usage=True,  # Help with memory issues
+                torch_dtype="auto"  # Use appropriate dtype
+            )
+            return tokenizer, model
+        except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
+            if attempt < max_retries - 1:
+                print(f"Timeout error: {str(e)}. Retrying in {retry_delay} seconds...")
+                time.sleep(retry_delay)
+                retry_delay *= 2  # Exponential backoff
+            else:
+                raise Exception(f"Failed to load model after {max_retries} attempts: {str(e)}")
+        except Exception as e:
+            raise Exception(f"Error loading model: {str(e)}")
+# Load model
+try:
+    tokenizer, model = load_model_with_retries(model_name)
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Error loading model: {str(e)}")
+    # Create placeholder objects for UI to start
+    # This allows the UI to start even if model loading fails initially
+    tokenizer = None
+    model = None
 def generate_text(prompt, max_length=100, temperature=0.7):
     """Generate text based on the input prompt."""
+    global tokenizer, model
+    # Check if model is loaded
+    if tokenizer is None or model is None:
+        try:
+            # Try loading the model again if it failed initially
+            tokenizer, model = load_model_with_retries(model_name)
+            print("Model loaded on demand")
+        except Exception as e:
+            return f"Error: Model could not be loaded. Please check your internet connection and try again. Details: {str(e)}"
+    try:
+        # Process the input
+        inputs = tokenizer(prompt, return_tensors="pt")
+        # Generate
+        outputs = model.generate(
+            inputs["input_ids"],
+            max_length=max_length,
+            temperature=temperature,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id if hasattr(tokenizer, 'eos_token_id') else tokenizer.pad_token_id,
+            num_return_sequences=1,
+            repetition_penalty=1.2,  # Reduce repetition
+            top_k=50,
+            top_p=0.95
+        )
+        # Decode and return the generated text
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return generated_text
+    except Exception as e:
+        return f"Error during text generation: {str(e)}"
+# Create Gradio interface with better error handling
+with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
+    gr.Markdown("# Burmese-GPT-v3 Text Generation")
+    gr.Markdown("Enter a prompt in Burmese to generate text using the Burmese-GPT-v3 model.")
+    # Add status indicator
+    with gr.Row():
+        model_status = gr.Markdown("⚠️ Model status: Checking..." if model is None else "✅ Model loaded and ready")
+    # Model loading button (for manual retry)
+    def load_model_manually():
+        global tokenizer, model
+        try:
+            tokenizer, model = load_model_with_retries(model_name)
+            return "✅ Model loaded successfully!"
+        except Exception as e:
+            return f"❌ Failed to load model: {str(e)}"
+    load_button = gr.Button("Retry Loading Model")
+    load_button.click(fn=load_model_manually, outputs=model_status)
+    # Add model info
+    gr.Markdown("### Model Information")
+    gr.Markdown("- **Model Name**: WYNN747/Burmese-GPT-v3")
+    gr.Markdown("- **Description**: A language model for Burmese text generation")
+    # Input components
+    with gr.Row():
+        with gr.Column(scale=3):
+            prompt = gr.Textbox(
+                lines=5,
+                placeholder="Enter your Burmese text prompt here...",
+                label="Prompt"
+            )
+        with gr.Column(scale=1):
+            max_length = gr.Slider(
+                minimum=50,
+                maximum=500,
+                value=100,
+                step=10,
+                label="Max Length"
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+    # Generate button
+    generate_btn = gr.Button("Generate Text", variant="primary")
+    # Output
+    output = gr.Textbox(lines=10, label="Generated Text")
+    # Set up the generation function
+    generate_btn.click(
+        fn=generate_text,
+        inputs=[prompt, max_length, temperature],
+        outputs=output
     )
+    # Add examples if available
+    gr.Examples(
+        examples=[
+            ["ဟုတ်ကဲ့ ကျွန်တော် ဗမာစကား ပြောတတ်ပါတယ်။", 150, 0.7],
+            ["မြန်မာနိုင်ငံမှာ", 200, 0.8],
+        ],
+        inputs=[prompt, max_length, temperature],
+        outputs=output,
+        fn=generate_text,
+        cache_examples=True,
+    )
+    # Add troubleshooting section
+    gr.Markdown("### Troubleshooting")
+    gr.Markdown("""
+    - If you see timeout errors, try refreshing the page or clicking "Retry Loading Model"
+    - If the model still fails to load, try again later when network conditions improve
+    - Make sure you have a stable internet connection
+    """)
+# Launch the app with caching and concurrency settings
+demo.launch(
+    cache_examples=True,
+    show_error=True,
+    server_name="0.0.0.0",  # Listen on all network interfaces
+    share=False,  # Set to True for temporary public link
+    max_threads=16  # Increase if needed
+)