neuralleap commited on
Commit
93076d0
Β·
verified Β·
1 Parent(s): bbb4028

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -121
app.py CHANGED
@@ -1,13 +1,18 @@
1
  import gradio as gr
2
  import os
3
  import time
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
- from huggingface_hub import HfApi
6
  import requests
7
  from requests.adapters import HTTPAdapter
8
  from urllib3.util.retry import Retry
9
 
10
- # Configure requests to be more resilient
 
 
 
 
 
11
  retry_strategy = Retry(
12
  total=5,
13
  backoff_factor=1,
@@ -19,100 +24,74 @@ session = requests.Session()
19
  session.mount("https://", adapter)
20
  session.mount("http://", adapter)
21
 
22
- # Set longer timeout for model downloads
23
- os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600" # 10 minutes timeout
24
 
25
- # Model name
26
- model_name = "WYNN747/Burmese-GPT-v3"
27
-
28
- # Function to load model with retries
29
  def load_model_with_retries(model_name, max_retries=3, retry_delay=5):
30
  for attempt in range(max_retries):
31
  try:
32
- print(f"Loading model attempt {attempt+1}/{max_retries}")
33
  tokenizer = AutoTokenizer.from_pretrained(
34
- model_name,
35
- use_fast=False, # Sometimes the fast tokenizer causes issues
36
- local_files_only=False,
37
- token=os.environ.get("HF_TOKEN", None) # Use token if available
38
  )
39
-
40
  model = AutoModelForCausalLM.from_pretrained(
41
  model_name,
42
- local_files_only=False,
43
- token=os.environ.get("HF_TOKEN", None),
44
  trust_remote_code=True,
45
- low_cpu_mem_usage=True, # Help with memory issues
46
- torch_dtype="auto" # Use appropriate dtype
47
- )
48
  return tokenizer, model
49
  except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
50
  if attempt < max_retries - 1:
51
- print(f"Timeout error: {str(e)}. Retrying in {retry_delay} seconds...")
52
  time.sleep(retry_delay)
53
- retry_delay *= 2 # Exponential backoff
54
  else:
55
- raise Exception(f"Failed to load model after {max_retries} attempts: {str(e)}")
56
  except Exception as e:
57
- raise Exception(f"Error loading model: {str(e)}")
58
 
59
- # Load model
60
  try:
61
  tokenizer, model = load_model_with_retries(model_name)
62
- print("Model loaded successfully!")
63
  except Exception as e:
64
- print(f"Error loading model: {str(e)}")
65
- # Create placeholder objects for UI to start
66
- # This allows the UI to start even if model loading fails initially
67
- tokenizer = None
68
- model = None
69
 
70
  def generate_text(prompt, max_length=100, temperature=0.7):
71
- """Generate text based on the input prompt."""
72
  global tokenizer, model
73
-
74
- # Check if model is loaded
75
  if tokenizer is None or model is None:
76
  try:
77
- # Try loading the model again if it failed initially
78
  tokenizer, model = load_model_with_retries(model_name)
79
- print("Model loaded on demand")
80
  except Exception as e:
81
- return f"Error: Model could not be loaded. Please check your internet connection and try again. Details: {str(e)}"
82
-
83
  try:
84
- # Process the input
85
- inputs = tokenizer(prompt, return_tensors="pt")
86
-
87
- # Generate
88
- outputs = model.generate(
89
- inputs["input_ids"],
90
- max_length=max_length,
91
- temperature=temperature,
92
- do_sample=True,
93
- pad_token_id=tokenizer.eos_token_id if hasattr(tokenizer, 'eos_token_id') else tokenizer.pad_token_id,
94
- num_return_sequences=1,
95
- repetition_penalty=1.2, # Reduce repetition
96
- top_k=50,
97
- top_p=0.95
98
- )
99
-
100
- # Decode and return the generated text
101
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
102
- return generated_text
103
  except Exception as e:
104
- return f"Error during text generation: {str(e)}"
105
 
106
- # Create Gradio interface with better error handling
107
  with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
108
- gr.Markdown("# Burmese-GPT-v3 Text Generation")
109
- gr.Markdown("Enter a prompt in Burmese to generate text using the Burmese-GPT-v3 model.")
110
-
111
- # Add status indicator
112
  with gr.Row():
113
- model_status = gr.Markdown("⚠️ Model status: Checking..." if model is None else "βœ… Model loaded and ready")
114
-
115
- # Model loading button (for manual retry)
116
  def load_model_manually():
117
  global tokenizer, model
118
  try:
@@ -120,82 +99,53 @@ with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
120
  return "βœ… Model loaded successfully!"
121
  except Exception as e:
122
  return f"❌ Failed to load model: {str(e)}"
123
-
124
- load_button = gr.Button("Retry Loading Model")
125
  load_button.click(fn=load_model_manually, outputs=model_status)
126
-
127
- # Add model info
128
- gr.Markdown("### Model Information")
129
- gr.Markdown("- **Model Name**: WYNN747/Burmese-GPT-v3")
130
- gr.Markdown("- **Description**: A language model for Burmese text generation")
131
-
132
- # Input components
133
  with gr.Row():
134
  with gr.Column(scale=3):
135
- prompt = gr.Textbox(
136
- lines=5,
137
- placeholder="Enter your Burmese text prompt here...",
138
- label="Prompt"
139
- )
140
  with gr.Column(scale=1):
141
- max_length = gr.Slider(
142
- minimum=50,
143
- maximum=500,
144
- value=100,
145
- step=10,
146
- label="Max Length"
147
- )
148
- temperature = gr.Slider(
149
- minimum=0.1,
150
- maximum=1.0,
151
- value=0.7,
152
- step=0.1,
153
- label="Temperature"
154
- )
155
-
156
- # Generate button
157
- generate_btn = gr.Button("Generate Text", variant="primary")
158
-
159
- # Output
160
- output = gr.Textbox(lines=10, label="Generated Text")
161
-
162
- # Set up the generation function
163
  generate_btn.click(
164
  fn=generate_text,
165
  inputs=[prompt, max_length, temperature],
166
  outputs=output
167
  )
168
-
169
- # Add examples if available
170
- with gr.Accordion("Examples", open=False):
171
- gr.Markdown("Click on any example to try it:")
172
- example_prompts = [
173
  ["α€Ÿα€―α€α€Ία€€α€²α€· ကျွန်တော် ဗမာစကား α€•α€Όα€±α€¬α€α€α€Ία€•α€«α€α€šα€Ία‹", 150, 0.7],
174
- ["မြန်မာနိုင်ငဢမှာ", 200, 0.8],
175
  ]
176
- for idx, example in enumerate(example_prompts):
177
  example_btn = gr.Button(f"Example {idx+1}: {example[0][:20]}...")
178
  example_btn.click(
179
- lambda e=example: (e[0], e[1], e[2]),
180
- inputs=[],
181
  outputs=[prompt, max_length, temperature]
182
  ).then(
183
  fn=generate_text,
184
  inputs=[prompt, max_length, temperature],
185
  outputs=output
186
  )
187
-
188
- # Add troubleshooting section
189
- gr.Markdown("### Troubleshooting")
190
  gr.Markdown("""
191
- - If you see timeout errors, try refreshing the page or clicking "Retry Loading Model"
192
- - If the model still fails to load, try again later when network conditions improve
193
- - Make sure you have a stable internet connection
194
- """)
195
 
196
- # Launch the app with appropriate settings
197
  demo.launch(
198
  show_error=True,
199
- server_name="0.0.0.0", # Listen on all network interfaces
200
- share=False # Set to True for temporary public link
201
- )
 
1
  import gradio as gr
2
  import os
3
  import time
4
+ import torch
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
6
  import requests
7
  from requests.adapters import HTTPAdapter
8
  from urllib3.util.retry import Retry
9
 
10
+ # Configuration
11
+ model_name = "WYNN747/Burmese-GPT-v3"
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600" # 10 minutes
14
+
15
+ # Configure resilient HTTP session
16
  retry_strategy = Retry(
17
  total=5,
18
  backoff_factor=1,
 
24
  session.mount("https://", adapter)
25
  session.mount("http://", adapter)
26
 
27
+ tokenizer = None
28
+ model = None
29
 
 
 
 
 
30
  def load_model_with_retries(model_name, max_retries=3, retry_delay=5):
31
  for attempt in range(max_retries):
32
  try:
33
+ print(f"πŸ”„ Loading model attempt {attempt+1}/{max_retries}")
34
  tokenizer = AutoTokenizer.from_pretrained(
35
+ model_name,
36
+ use_fast=True,
37
+ token=os.environ.get("HF_TOKEN", None)
 
38
  )
 
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_name,
41
+ torch_dtype=torch.float16,
 
42
  trust_remote_code=True,
43
+ low_cpu_mem_usage=True
44
+ ).to(device)
45
+ print("βœ… Model loaded successfully!")
46
  return tokenizer, model
47
  except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
48
  if attempt < max_retries - 1:
49
+ print(f"⚠️ Timeout: {str(e)}. Retrying in {retry_delay} seconds...")
50
  time.sleep(retry_delay)
51
+ retry_delay *= 2
52
  else:
53
+ raise Exception(f"❌ Failed to load model after {max_retries} attempts: {str(e)}")
54
  except Exception as e:
55
+ raise Exception(f"❌ Error loading model: {str(e)}")
56
 
 
57
  try:
58
  tokenizer, model = load_model_with_retries(model_name)
 
59
  except Exception as e:
60
+ print(str(e))
 
 
 
 
61
 
62
  def generate_text(prompt, max_length=100, temperature=0.7):
 
63
  global tokenizer, model
 
 
64
  if tokenizer is None or model is None:
65
  try:
 
66
  tokenizer, model = load_model_with_retries(model_name)
 
67
  except Exception as e:
68
+ return f"❌ Model could not be loaded. Details: {str(e)}"
 
69
  try:
70
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
71
+ model.eval()
72
+ with torch.no_grad():
73
+ outputs = model.generate(
74
+ inputs["input_ids"],
75
+ max_length=max_length,
76
+ temperature=temperature,
77
+ do_sample=True,
78
+ top_k=50,
79
+ top_p=0.95,
80
+ repetition_penalty=1.2,
81
+ pad_token_id=tokenizer.eos_token_id
82
+ )
83
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
84
  except Exception as e:
85
+ return f"❌ Text generation error: {str(e)}"
86
 
87
+ # Gradio UI
88
  with gr.Blocks(title="Burmese-GPT-v3 Text Generation") as demo:
89
+ gr.Markdown("# πŸ“ Burmese-GPT-v3 Text Generator")
90
+ gr.Markdown("Enter a Burmese prompt below and generate text using the `WYNN747/Burmese-GPT-v3` model.")
91
+
 
92
  with gr.Row():
93
+ model_status = gr.Markdown("βœ… Model is loaded and ready!" if model else "⚠️ Model not loaded yet.")
94
+
 
95
  def load_model_manually():
96
  global tokenizer, model
97
  try:
 
99
  return "βœ… Model loaded successfully!"
100
  except Exception as e:
101
  return f"❌ Failed to load model: {str(e)}"
102
+
103
+ load_button = gr.Button("πŸ”„ Retry Loading Model")
104
  load_button.click(fn=load_model_manually, outputs=model_status)
105
+
 
 
 
 
 
 
106
  with gr.Row():
107
  with gr.Column(scale=3):
108
+ prompt = gr.Textbox(lines=5, placeholder="Enter Burmese text here...", label="Prompt")
 
 
 
 
109
  with gr.Column(scale=1):
110
+ max_length = gr.Slider(50, 500, value=100, step=10, label="Max Length")
111
+ temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
112
+
113
+ generate_btn = gr.Button("πŸš€ Generate Text")
114
+ output = gr.Textbox(lines=10, label="Generated Output")
115
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  generate_btn.click(
117
  fn=generate_text,
118
  inputs=[prompt, max_length, temperature],
119
  outputs=output
120
  )
121
+
122
+ with gr.Accordion("πŸ“Œ Examples", open=False):
123
+ gr.Markdown("Try these example prompts:")
124
+ examples = [
 
125
  ["α€Ÿα€―α€α€Ία€€α€²α€· ကျွန်တော် ဗမာစကား α€•α€Όα€±α€¬α€α€α€Ία€•α€«α€α€šα€Ία‹", 150, 0.7],
126
+ ["မြန်မာနိုင်ငဢမှာ", 200, 0.8]
127
  ]
128
+ for idx, example in enumerate(examples):
129
  example_btn = gr.Button(f"Example {idx+1}: {example[0][:20]}...")
130
  example_btn.click(
131
+ lambda e=example: (e[0], e[1], e[2]),
132
+ inputs=[],
133
  outputs=[prompt, max_length, temperature]
134
  ).then(
135
  fn=generate_text,
136
  inputs=[prompt, max_length, temperature],
137
  outputs=output
138
  )
139
+
140
+ gr.Markdown("### πŸ› οΈ Troubleshooting")
 
141
  gr.Markdown("""
142
+ - Try the "Retry Loading Model" button if the model fails to load.
143
+ - Keep prompts short initially to test responsiveness.
144
+ - Make sure you are using a GPU-enabled space (T4 Medium or better).
145
+ """)
146
 
 
147
  demo.launch(
148
  show_error=True,
149
+ server_name="0.0.0.0",
150
+ share=False
151
+ )