FINGU-AI commited on
Commit
d69d4a4
·
verified ·
1 Parent(s): 171d50c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -19,7 +19,7 @@ print(zero.device) # <-- 'cpu' 🤔
19
  model_id = 'FINGU-AI/Qwen-Orpo-v1' #attn_implementation="flash_attention_2",
20
  model = AutoModelForCausalLM.from_pretrained(model_id,attn_implementation="sdpa", torch_dtype= torch.bfloat16)
21
  tokenizer = AutoTokenizer.from_pretrained(model_id)
22
- streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
23
  model.to('cuda')
24
 
25
  # terminators = [
@@ -44,12 +44,12 @@ def inference(query):
44
  ]
45
 
46
  tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
47
- # outputs = model.generate(tokenized_chat, **generation_params)
48
- # decoded_outputs = tokenizer.batch_decode(outputs, skip_specail_tokens=True)
49
- # assistant_response = decoded_outputs[0].split("assistant:")[-1].strip()
50
- # return assistant_response
51
- outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
52
- return outputs
53
 
54
  examples = ['How can options strategies such as straddles, strangles, and spreads be used to hedge against market volatility?',
55
  'How do changes in interest rates, inflation, and GDP growth impact stock and bond markets?',
 
19
  model_id = 'FINGU-AI/Qwen-Orpo-v1' #attn_implementation="flash_attention_2",
20
  model = AutoModelForCausalLM.from_pretrained(model_id,attn_implementation="sdpa", torch_dtype= torch.bfloat16)
21
  tokenizer = AutoTokenizer.from_pretrained(model_id)
22
+ # streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
23
  model.to('cuda')
24
 
25
  # terminators = [
 
44
  ]
45
 
46
  tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
47
+ outputs = model.generate(tokenized_chat, **generation_params)
48
+ decoded_outputs = tokenizer.batch_decode(outputs, skip_specail_tokens=True)
49
+ assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
50
+ return assistant_response
51
+ # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
52
+ # return outputs
53
 
54
  examples = ['How can options strategies such as straddles, strangles, and spreads be used to hedge against market volatility?',
55
  'How do changes in interest rates, inflation, and GDP growth impact stock and bond markets?',