neuralleap commited on
Commit
87524cd
Β·
verified Β·
1 Parent(s): 93076d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -110
app.py CHANGED
@@ -1,114 +1,71 @@
1
  import gradio as gr
2
  import os
3
- import time
4
- import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import requests
7
- from requests.adapters import HTTPAdapter
8
- from urllib3.util.retry import Retry
9
 
 
10
  # Configuration
11
- model_name = "WYNN747/Burmese-GPT-v3"
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
13
- os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600" # 10 minutes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Configure resilient HTTP session
16
- retry_strategy = Retry(
17
- total=5,
18
- backoff_factor=1,
19
- status_forcelist=[429, 500, 502, 503, 504],
20
- allowed_methods=["HEAD", "GET", "OPTIONS"]
21
- )
22
- adapter = HTTPAdapter(max_retries=retry_strategy)
23
- session = requests.Session()
24
- session.mount("https://", adapter)
25
- session.mount("http://", adapter)
26
-
27
- tokenizer = None
28
- model = None
29
-
30
- def load_model_with_retries(model_name, max_retries=3, retry_delay=5):
31
- for attempt in range(max_retries):
32
- try:
33
- print(f"πŸ”„ Loading model attempt {attempt+1}/{max_retries}")
34
- tokenizer = AutoTokenizer.from_pretrained(
35
- model_name,
36
- use_fast=True,
37
- token=os.environ.get("HF_TOKEN", None)
38
- )
39
- model = AutoModelForCausalLM.from_pretrained(
40
- model_name,
41
- torch_dtype=torch.float16,
42
- trust_remote_code=True,
43
- low_cpu_mem_usage=True
44
- ).to(device)
45
- print("βœ… Model loaded successfully!")
46
- return tokenizer, model
47
- except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
48
- if attempt < max_retries - 1:
49
- print(f"⚠️ Timeout: {str(e)}. Retrying in {retry_delay} seconds...")
50
- time.sleep(retry_delay)
51
- retry_delay *= 2
52
- else:
53
- raise Exception(f"❌ Failed to load model after {max_retries} attempts: {str(e)}")
54
- except Exception as e:
55
- raise Exception(f"❌ Error loading model: {str(e)}")
56
-
57
- try:
58
- tokenizer, model = load_model_with_retries(model_name)
59
- except Exception as e:
60
- print(str(e))
61
-
62
- def generate_text(prompt, max_length=100, temperature=0.7):
63
- global tokenizer, model
64
- if tokenizer is None or model is None:
65
- try:
66
- tokenizer, model = load_model_with_retries(model_name)
67
- except Exception as e:
68
- return f"❌ Model could not be loaded. Details: {str(e)}"
69
  try:
70
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
71
- model.eval()
72
- with torch.no_grad():
73
- outputs = model.generate(
74
- inputs["input_ids"],
75
- max_length=max_length,
76
- temperature=temperature,
77
- do_sample=True,
78
- top_k=50,
79
- top_p=0.95,
80
- repetition_penalty=1.2,
81
- pad_token_id=tokenizer.eos_token_id
82
- )
83
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
84
  except Exception as e:
85
- return f"❌ Text generation error: {str(e)}"
86
 
 
87
  # Gradio UI
88
- with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
89
- gr.Markdown("# πŸ“ Burmese-GPT-v3 Text Generator")
90
- gr.Markdown("Enter a Burmese prompt below and generate text using the `WYNN747/Burmese-GPT-v3` model.")
91
-
92
- with gr.Row():
93
- model_status = gr.Markdown("βœ… Model is loaded and ready!" if model else "⚠️ Model not loaded yet.")
94
-
95
- def load_model_manually():
96
- global tokenizer, model
97
- try:
98
- tokenizer, model = load_model_with_retries(model_name)
99
- return "βœ… Model loaded successfully!"
100
- except Exception as e:
101
- return f"❌ Failed to load model: {str(e)}"
102
-
103
- load_button = gr.Button("πŸ”„ Retry Loading Model")
104
- load_button.click(fn=load_model_manually, outputs=model_status)
105
 
106
  with gr.Row():
107
  with gr.Column(scale=3):
108
- prompt = gr.Textbox(lines=5, placeholder="Enter Burmese text here...", label="Prompt")
 
 
 
 
109
  with gr.Column(scale=1):
110
- max_length = gr.Slider(50, 500, value=100, step=10, label="Max Length")
111
- temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  generate_btn = gr.Button("πŸš€ Generate Text")
114
  output = gr.Textbox(lines=10, label="Generated Output")
@@ -119,16 +76,15 @@ with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
119
  outputs=output
120
  )
121
 
122
- with gr.Accordion("πŸ“Œ Examples", open=False):
123
- gr.Markdown("Try these example prompts:")
124
  examples = [
125
- ["α€Ÿα€―α€α€Ία€€α€²α€· ကျွန်တော် ဗမာစကား α€•α€Όα€±α€¬α€α€α€Ία€•α€«α€α€šα€Ία‹", 150, 0.7],
126
- ["မြန်မာနိုင်ငဢမှာ", 200, 0.8]
127
  ]
128
- for idx, example in enumerate(examples):
129
- example_btn = gr.Button(f"Example {idx+1}: {example[0][:20]}...")
130
  example_btn.click(
131
- lambda e=example: (e[0], e[1], e[2]),
132
  inputs=[],
133
  outputs=[prompt, max_length, temperature]
134
  ).then(
@@ -137,13 +93,11 @@ with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
137
  outputs=output
138
  )
139
 
140
- gr.Markdown("### πŸ› οΈ Troubleshooting")
141
- gr.Markdown("""
142
- - Try the "Retry Loading Model" button if the model fails to load.
143
- - Keep prompts short initially to test responsiveness.
144
- - Make sure you are using a GPU-enabled space (T4 Medium or better).
145
- """)
146
 
 
 
 
147
  demo.launch(
148
  show_error=True,
149
  server_name="0.0.0.0",
 
1
  import gradio as gr
2
  import os
 
 
 
3
  import requests
 
 
4
 
5
+ # --------------------------
6
  # Configuration
7
+ # --------------------------
8
+ HF_ENDPOINT_URL = "https://burmese-gpt-v3-poz.us-east-1.aws.endpoints.huggingface.cloud"
9
+ HF_TOKEN = os.environ.get("HF_TOKEN") # βœ… Loaded securely from Hugging Face Secrets
10
+
11
+ headers = {
12
+ "Authorization": f"Bearer {HF_TOKEN}",
13
+ "Content-Type": "application/json"
14
+ }
15
+
16
+ # --------------------------
17
+ # Function to Call Endpoint
18
+ # --------------------------
19
+ def generate_text(prompt, max_length=150, temperature=0.7):
20
+ payload = {
21
+ "inputs": prompt,
22
+ "parameters": {
23
+ "max_new_tokens": max_length,
24
+ "temperature": temperature,
25
+ "top_k": 50,
26
+ "top_p": 0.95,
27
+ "repetition_penalty": 1.5
28
+ }
29
+ }
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
+ response = requests.post(HF_ENDPOINT_URL, headers=headers, json=payload)
33
+ if response.status_code == 200:
34
+ return response.json()[0]["generated_text"]
35
+ else:
36
+ return f"❌ Error {response.status_code}: {response.text}"
 
 
 
 
 
 
 
 
 
37
  except Exception as e:
38
+ return f"❌ Failed to connect to endpoint: {str(e)}"
39
 
40
+ # --------------------------
41
  # Gradio UI
42
+ # --------------------------
43
+ with gr.Blocks(title="Burmese-GPT-v3 (Endpoint)") as demo:
44
+ gr.Markdown("## πŸ“ Burmese GPT-3 Text Generator via Hugging Face Endpoint")
45
+ gr.Markdown("Enter a Burmese prompt below and see the model generate text using a hosted inference endpoint.")
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  with gr.Row():
48
  with gr.Column(scale=3):
49
+ prompt = gr.Textbox(
50
+ lines=5,
51
+ placeholder="Enter your Burmese text here...",
52
+ label="Input Prompt"
53
+ )
54
  with gr.Column(scale=1):
55
+ max_length = gr.Slider(
56
+ minimum=50,
57
+ maximum=300,
58
+ value=150,
59
+ step=10,
60
+ label="Max New Tokens"
61
+ )
62
+ temperature = gr.Slider(
63
+ minimum=0.1,
64
+ maximum=1.0,
65
+ value=0.7,
66
+ step=0.1,
67
+ label="Temperature"
68
+ )
69
 
70
  generate_btn = gr.Button("πŸš€ Generate Text")
71
  output = gr.Textbox(lines=10, label="Generated Output")
 
76
  outputs=output
77
  )
78
 
79
+ with gr.Accordion("πŸ“Œ Example Prompts", open=False):
 
80
  examples = [
81
+ ["α€™α€„α€Ία€Ήα€‚α€œα€¬α€•α€«α‹ ကျွန်တော်က ကိုထောင်ပါ။ α€›α€”α€Ία€€α€―α€”α€Ία€™α€Ύα€¬α€”α€±α€α€šα€Ία‹ α€†α€›α€¬α€œα€―α€•α€Ία€•α€«α€α€šα€Ία‹", 150, 0.7],
82
+ ["မြန်မာနိုင်ငဢမှာ ရိုးရာပွဲတော်များ α€˜α€šα€Ία€œα€­α€―α€€α€»α€„α€Ία€Έα€•α€€α€Όα€α€¬α€œα€²α‹", 200, 0.8]
83
  ]
84
+ for idx, ex in enumerate(examples):
85
+ example_btn = gr.Button(f"Example {idx+1}")
86
  example_btn.click(
87
+ lambda e=ex: (e[0], e[1], e[2]),
88
  inputs=[],
89
  outputs=[prompt, max_length, temperature]
90
  ).then(
 
93
  outputs=output
94
  )
95
 
96
+ gr.Markdown("### ℹ️ Troubleshooting\n- Make sure the endpoint URL is correct.\n- Ensure your `HF_TOKEN` secret is added.\n- Try refreshing if the model was inactive for a while.")
 
 
 
 
 
97
 
98
+ # --------------------------
99
+ # Launch
100
+ # --------------------------
101
  demo.launch(
102
  show_error=True,
103
  server_name="0.0.0.0",