ThomasBlumet commited on
Commit
c19f2a6
·
1 Parent(s): b0e2427

change model

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -10,6 +10,7 @@ logger = logging.get_logger("transformers")
10
  model_name = "openai-community/gpt2" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)#,use_fast=True
12
  model = AutoModelForCausalLM.from_pretrained(model_name)#,device_map="auto",trust_remote_code=False,revision="main")
 
13
 
14
  #transfer model on GPU
15
  #model.to("cuda")
@@ -18,8 +19,9 @@ model = AutoModelForCausalLM.from_pretrained(model_name)#,device_map="auto",trus
18
  #@spaces.GPU(duration=60)
19
  def generate_text(input_text):
20
  input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
21
- #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
22
- output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
 
23
  return tokenizer.decode(output[0])
24
 
25
  # Example of disabling Exllama backend (if applicable in your configuration)
 
10
  model_name = "openai-community/gpt2" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)#,use_fast=True
12
  model = AutoModelForCausalLM.from_pretrained(model_name)#,device_map="auto",trust_remote_code=False,revision="main")
13
+ tokenizer.pad_token_id = tokenizer.eos_token_id
14
 
15
  #transfer model on GPU
16
  #model.to("cuda")
 
19
  #@spaces.GPU(duration=60)
20
  def generate_text(input_text):
21
  input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
22
+ attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
23
+ #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
24
+ output = model.generate(input_ids, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
25
  return tokenizer.decode(output[0])
26
 
27
  # Example of disabling Exllama backend (if applicable in your configuration)