Spaces:

neuralleap
/

Healthelic-Burmese-LLM

Sleeping

App Files Files Community

neuralleap commited on Apr 17

Commit

87524cd

verified ·

1 Parent(s): 93076d0

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -110

app.py CHANGED Viewed

@@ -1,114 +1,71 @@
 import gradio as gr
 import os
-import time
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import requests
-from requests.adapters import HTTPAdapter
-from urllib3.util.retry import Retry
 # Configuration
-model_name = "WYNN747/Burmese-GPT-v3"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600"  # 10 minutes
-# Configure resilient HTTP session
-retry_strategy = Retry(
-    total=5,
-    backoff_factor=1,
-    status_forcelist=[429, 500, 502, 503, 504],
-    allowed_methods=["HEAD", "GET", "OPTIONS"]
-)
-adapter = HTTPAdapter(max_retries=retry_strategy)
-session = requests.Session()
-session.mount("https://", adapter)
-session.mount("http://", adapter)
-tokenizer = None
-model = None
-def load_model_with_retries(model_name, max_retries=3, retry_delay=5):
-    for attempt in range(max_retries):
-        try:
-            print(f"🔄 Loading model attempt {attempt+1}/{max_retries}")
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_name,
-                use_fast=True,
-                token=os.environ.get("HF_TOKEN", None)
-            )
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16,
-                trust_remote_code=True,
-                low_cpu_mem_usage=True
-            ).to(device)
-            print("✅ Model loaded successfully!")
-            return tokenizer, model
-        except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
-            if attempt < max_retries - 1:
-                print(f"⚠️ Timeout: {str(e)}. Retrying in {retry_delay} seconds...")
-                time.sleep(retry_delay)
-                retry_delay *= 2
-            else:
-                raise Exception(f"❌ Failed to load model after {max_retries} attempts: {str(e)}")
-        except Exception as e:
-            raise Exception(f"❌ Error loading model: {str(e)}")
-try:
-    tokenizer, model = load_model_with_retries(model_name)
-except Exception as e:
-    print(str(e))
-def generate_text(prompt, max_length=100, temperature=0.7):
-    global tokenizer, model
-    if tokenizer is None or model is None:
-        try:
-            tokenizer, model = load_model_with_retries(model_name)
-        except Exception as e:
-            return f"❌ Model could not be loaded. Details: {str(e)}"
     try:
-        inputs = tokenizer(prompt, return_tensors="pt").to(device)
-        model.eval()
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs["input_ids"],
-                max_length=max_length,
-                temperature=temperature,
-                do_sample=True,
-                top_k=50,
-                top_p=0.95,
-                repetition_penalty=1.2,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
-        return f"❌ Text generation error: {str(e)}"
 # Gradio UI
-with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
-    gr.Markdown("# 📝 Burmese-GPT-v3 Text Generator")
-    gr.Markdown("Enter a Burmese prompt below and generate text using the `WYNN747/Burmese-GPT-v3` model.")
-    with gr.Row():
-        model_status = gr.Markdown("✅ Model is loaded and ready!" if model else "⚠️ Model not loaded yet.")
-    def load_model_manually():
-        global tokenizer, model
-        try:
-            tokenizer, model = load_model_with_retries(model_name)
-            return "✅ Model loaded successfully!"
-        except Exception as e:
-            return f"❌ Failed to load model: {str(e)}"
-    load_button = gr.Button("🔄 Retry Loading Model")
-    load_button.click(fn=load_model_manually, outputs=model_status)
     with gr.Row():
         with gr.Column(scale=3):
-            prompt = gr.Textbox(lines=5, placeholder="Enter Burmese text here...", label="Prompt")
         with gr.Column(scale=1):
-            max_length = gr.Slider(50, 500, value=100, step=10, label="Max Length")
-            temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
     generate_btn = gr.Button("🚀 Generate Text")
     output = gr.Textbox(lines=10, label="Generated Output")
@@ -119,16 +76,15 @@ with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
         outputs=output
     )
-    with gr.Accordion("📌 Examples", open=False):
-        gr.Markdown("Try these example prompts:")
         examples = [
-            ["ဟုတ်ကဲ့ ကျွန်တော် ဗမာစကား ပြောတတ်ပါတယ်။", 150, 0.7],
-            ["မြန်မာနိုင်ငံမှာ", 200, 0.8]
         ]
-        for idx, example in enumerate(examples):
-            example_btn = gr.Button(f"Example {idx+1}: {example[0][:20]}...")
             example_btn.click(
-                lambda e=example: (e[0], e[1], e[2]),
                 inputs=[],
                 outputs=[prompt, max_length, temperature]
             ).then(
@@ -137,13 +93,11 @@ with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
                 outputs=output
             )
-    gr.Markdown("### 🛠️ Troubleshooting")
-    gr.Markdown("""
-- Try the "Retry Loading Model" button if the model fails to load.
-- Keep prompts short initially to test responsiveness.
-- Make sure you are using a GPU-enabled space (T4 Medium or better).
-""")
 demo.launch(
     show_error=True,
     server_name="0.0.0.0",

 import gradio as gr
 import os
 import requests
+# --------------------------
 # Configuration
+# --------------------------
+HF_ENDPOINT_URL = "https://burmese-gpt-v3-poz.us-east-1.aws.endpoints.huggingface.cloud"
+HF_TOKEN = os.environ.get("HF_TOKEN")  # ✅ Loaded securely from Hugging Face Secrets
+headers = {
+    "Authorization": f"Bearer {HF_TOKEN}",
+    "Content-Type": "application/json"
+}
+# --------------------------
+# Function to Call Endpoint
+# --------------------------
+def generate_text(prompt, max_length=150, temperature=0.7):
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": max_length,
+            "temperature": temperature,
+            "top_k": 50,
+            "top_p": 0.95,
+            "repetition_penalty": 1.5
+        }
+    }
     try:
+        response = requests.post(HF_ENDPOINT_URL, headers=headers, json=payload)
+        if response.status_code == 200:
+            return response.json()[0]["generated_text"]
+        else:
+            return f"❌ Error {response.status_code}: {response.text}"
     except Exception as e:
+        return f"❌ Failed to connect to endpoint: {str(e)}"
+# --------------------------
 # Gradio UI
+# --------------------------
+with gr.Blocks(title="Burmese-GPT-v3 (Endpoint)") as demo:
+    gr.Markdown("## 📝 Burmese GPT-3 Text Generator via Hugging Face Endpoint")
+    gr.Markdown("Enter a Burmese prompt below and see the model generate text using a hosted inference endpoint.")
     with gr.Row():
         with gr.Column(scale=3):
+            prompt = gr.Textbox(
+                lines=5,
+                placeholder="Enter your Burmese text here...",
+                label="Input Prompt"
+            )
         with gr.Column(scale=1):
+            max_length = gr.Slider(
+                minimum=50,
+                maximum=300,
+                value=150,
+                step=10,
+                label="Max New Tokens"
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
     generate_btn = gr.Button("🚀 Generate Text")
     output = gr.Textbox(lines=10, label="Generated Output")
         outputs=output
     )
+    with gr.Accordion("📌 Example Prompts", open=False):
         examples = [
+            ["မင်္ဂလာပါ။ ကျွန်တော်က ကိုအောင်ပါ။ ရန်ကုန်မှာနေတယ်။ ဆရာလုပ်ပါတယ်။", 150, 0.7],
+            ["မြန်မာနိုင်ငံမှာ ရိုးရာပွဲတော်များ ဘယ်လိုကျင်းပကြတာလဲ။", 200, 0.8]
         ]
+        for idx, ex in enumerate(examples):
+            example_btn = gr.Button(f"Example {idx+1}")
             example_btn.click(
+                lambda e=ex: (e[0], e[1], e[2]),
                 inputs=[],
                 outputs=[prompt, max_length, temperature]
             ).then(
                 outputs=output
             )
+    gr.Markdown("### ℹ️ Troubleshooting\n- Make sure the endpoint URL is correct.\n- Ensure your `HF_TOKEN` secret is added.\n- Try refreshing if the model was inactive for a while.")
+# --------------------------
+# Launch
+# --------------------------
 demo.launch(
     show_error=True,
     server_name="0.0.0.0",