Tonic commited on
Commit
813bedf
·
1 Parent(s): 78f66f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -9,13 +9,19 @@ examples = [["How are you?"]]
9
 
10
 
11
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
12
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
 
13
 
14
 
15
  def predict(input, history=[]):
16
  # tokenize the new input sentence
17
  new_user_input_ids = tokenizer.encode(
18
- input + tokenizer.eos_token, return_tensors="pt"
 
 
 
 
 
19
  )
20
 
21
  # append the new user input tokens to the chat history
@@ -23,7 +29,7 @@ def predict(input, history=[]):
23
 
24
  # generate a response
25
  history = model.generate(
26
- bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id
27
  ).tolist()
28
 
29
  # convert the tokens to text, and then split the responses into lines
 
9
 
10
 
11
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
12
+ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
13
+ model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # Different generation length, top_p and other related super parameters can be specified.
14
 
15
 
16
  def predict(input, history=[]):
17
  # tokenize the new input sentence
18
  new_user_input_ids = tokenizer.encode(
19
+ if input is not None and tokenizer.eos_token is not None:
20
+ combined_input = input + tokenizer.eos_token
21
+ # Rest of your code using combined_input
22
+ else:
23
+ # Handle the case where input or tokenizer.eos_token is None
24
+ print("Input or eos_token is None. Cannot concatenate.")
25
  )
26
 
27
  # append the new user input tokens to the chat history
 
29
 
30
  # generate a response
31
  history = model.generate(
32
+ bot_input_ids, max_length=20, pad_token_id=tokenizer.eos_token_id
33
  ).tolist()
34
 
35
  # convert the tokens to text, and then split the responses into lines