Pratham Bhat commited on
Commit
853e734
·
1 Parent(s): b2344d3

Reverted changes

Browse files
Files changed (1) hide show
  1. main.py +25 -17
main.py CHANGED
@@ -36,29 +36,36 @@ def format_prompt(system, message, history):
36
  prompt += {"role": "user", "content": message}
37
  return prompt
38
 
39
- def setup():
40
- device = "cuda" if torch.cuda.is_available() else "cpu"
41
 
42
- # if torch.backends.mps.is_available():
43
- # device = torch.device("mps")
44
- # x = torch.ones(1, device=device)
45
- # print (x)
46
- # else:
47
- # device="cpu"
48
- # print ("MPS device not found.")
 
 
 
49
 
50
- # device = "auto"
51
- # device=torch.device("cpu")
 
 
 
52
 
 
 
 
 
 
53
  model_path = "ibm-granite/granite-34b-code-instruct-8k"
54
  tokenizer = AutoTokenizer.from_pretrained(model_path)
55
  # drop device_map if running on CPU
56
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
57
  model.eval()
58
-
59
- return model, tokenizer, device
60
-
61
- def generate(item: Item, model, tokenizer, device):
62
  # change input text as desired
63
  chat = format_prompt(item.system_prompt, item.prompt, item.history)
64
  chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
@@ -73,11 +80,12 @@ def generate(item: Item, model, tokenizer, device):
73
  return output_text
74
 
75
 
76
- model, tokenizer, device = setup()
77
 
78
  @app.post("/generate/")
79
  async def generate_text(item: Item):
80
- return {"response": generate(item, model, tokenizer, device)}
 
81
 
82
  @app.get("/")
83
  async def generate_text_root(item: Item):
 
36
  prompt += {"role": "user", "content": message}
37
  return prompt
38
 
39
+ # def setup():
40
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
41
 
42
+ # # if torch.backends.mps.is_available():
43
+ # # device = torch.device("mps")
44
+ # # x = torch.ones(1, device=device)
45
+ # # print (x)
46
+ # # else:
47
+ # # device="cpu"
48
+ # # print ("MPS device not found.")
49
+
50
+ # # device = "auto"
51
+ # # device=torch.device("cpu")
52
 
53
+ # model_path = "ibm-granite/granite-34b-code-instruct-8k"
54
+ # tokenizer = AutoTokenizer.from_pretrained(model_path)
55
+ # # drop device_map if running on CPU
56
+ # model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
57
+ # model.eval()
58
 
59
+ # return model, tokenizer, device
60
+
61
+ def generate(item: Item):
62
+ device = "cuda" if torch.cuda.is_available() else "cpu"
63
+
64
  model_path = "ibm-granite/granite-34b-code-instruct-8k"
65
  tokenizer = AutoTokenizer.from_pretrained(model_path)
66
  # drop device_map if running on CPU
67
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
68
  model.eval()
 
 
 
 
69
  # change input text as desired
70
  chat = format_prompt(item.system_prompt, item.prompt, item.history)
71
  chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
 
80
  return output_text
81
 
82
 
83
+ # model, tokenizer, device = setup()
84
 
85
  @app.post("/generate/")
86
  async def generate_text(item: Item):
87
+ return {"response": generate(item)}
88
+ # return {"response": generate(item, model, tokenizer, device)}
89
 
90
  @app.get("/")
91
  async def generate_text_root(item: Item):