Rathapoom commited on
Commit
62ce3e8
·
verified ·
1 Parent(s): b25a941

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -1,25 +1,21 @@
1
- import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import os
4
- import torch
5
 
6
  model_name = "scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq"
7
  token = os.getenv("HF_TOKEN")
8
 
9
- # Check if CUDA is available
10
- device = torch.device("cuda")
11
- torch.cuda.set_device(0) # Use the first CUDA device
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
14
- model = AutoModelForCausalLM.from_pretrained(model_name, token=token).to(device)
15
-
16
- print(f"CUDA available: {torch.cuda.is_available()}")
17
- print(f"Current device: {torch.cuda.current_device()}")
18
- print(f"Device name: {torch.cuda.get_device_name(0)}")
19
 
20
  def generate_text(prompt):
21
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
22
  outputs = model.generate(inputs.input_ids, max_length=50)
23
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
24
 
25
- gr.Interface(fn=generate_text, inputs="text", outputs="text").launch()
 
1
+ import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import os
4
+ import gradio as gr
5
 
6
  model_name = "scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq"
7
  token = os.getenv("HF_TOKEN")
8
 
9
+ # Remove these lines
10
+ # device = torch.device("cuda")
11
+ # torch.cuda.set_device(0)
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
 
 
 
 
15
 
16
  def generate_text(prompt):
17
+ inputs = tokenizer(prompt, return_tensors="pt")
18
  outputs = model.generate(inputs.input_ids, max_length=50)
19
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
20
 
21
+ gr.Interface(fn=generate_text, inputs="text", outputs="text").launch()