Akjava commited on
Commit
3043da5
Β·
verified Β·
1 Parent(s): 569ebbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -14
app.py CHANGED
@@ -7,6 +7,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
  import gradio as gr
8
 
9
  text_generator = None
 
10
  def init():
11
  global text_generator
12
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -27,27 +28,35 @@ def init():
27
  print(model_id,device,dtype)
28
  histories = []
29
  #model = None
 
30
 
31
 
32
- if next(model.parameters()).is_cuda:
33
- print("The model is on a GPU")
34
- else:
35
- print("The model is on a CPU")
36
-
37
- #print(f"text_generator.device='{text_generator.device}")
38
- if str(text_generator.device).strip() == 'cuda':
39
- print("The pipeline is using a GPU")
40
- else:
41
- print("The pipeline is using a CPU")
 
 
 
 
 
 
42
 
43
  print("initialized")
44
 
45
  @spaces.GPU(duration=120)
46
  def generate_text(messages):
47
- model = AutoModelForCausalLM.from_pretrained(
48
- model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
49
- )
50
- text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ) #pipeline has not to(device)
 
51
  result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
52
 
53
  generated_output = result[0]["generated_text"]
 
7
  import gradio as gr
8
 
9
  text_generator = None
10
+ is_hugging_face = True
11
  def init():
12
  global text_generator
13
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
28
  print(model_id,device,dtype)
29
  histories = []
30
  #model = None
31
+
32
 
33
 
34
+ if not is_hugging_face:
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
37
+ )
38
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ) #pipeline has not to(device)
39
+
40
+ if next(model.parameters()).is_cuda:
41
+ print("The model is on a GPU")
42
+ else:
43
+ print("The model is on a CPU")
44
+
45
+ #print(f"text_generator.device='{text_generator.device}")
46
+ if str(text_generator.device).strip() == 'cuda':
47
+ print("The pipeline is using a GPU")
48
+ else:
49
+ print("The pipeline is using a CPU")
50
 
51
  print("initialized")
52
 
53
  @spaces.GPU(duration=120)
54
  def generate_text(messages):
55
+ if is_hugging_face:#need everytime initialize for ZeroGPU
56
+ model = AutoModelForCausalLM.from_pretrained(
57
+ model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
58
+ )
59
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ) #pipeline has not to(device)
60
  result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
61
 
62
  generated_output = result[0]["generated_text"]