DavidFM43 commited on
Commit
d28eaca
·
1 Parent(s): cb440a4

change model to sharded version

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -6,13 +6,11 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
6
  peft_model_id = "hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es"
7
  config = PeftConfig.from_pretrained(peft_model_id)
8
  model = AutoModelForCausalLM.from_pretrained(
9
- config.base_model_name_or_path,
10
  return_dict=True,
11
  load_in_8bit=True,
12
  device_map="auto",
13
- revision="half",
14
  )
15
- model.tie_weights()
16
  tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
17
  # load the Lora model
18
  model = PeftModel.from_pretrained(model, peft_model_id)
@@ -28,9 +26,7 @@ def gen_entities(text):
28
  with torch.cuda.amp.autocast():
29
  output_tokens = model.generate(**batch, max_new_tokens=256, eos_token_id=50258)
30
 
31
- response = tokenizer.batch_decode(
32
- output_tokens.detach().cpu().numpy(), skip_special_tokens=False
33
- )[0]
34
 
35
  return response[response.find("entities") : response.find("<EP>")]
36
 
 
6
  peft_model_id = "hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es"
7
  config = PeftConfig.from_pretrained(peft_model_id)
8
  model = AutoModelForCausalLM.from_pretrained(
9
+ "DavidFM43/bertin-gpt-j-6b-half-sharded",
10
  return_dict=True,
11
  load_in_8bit=True,
12
  device_map="auto",
 
13
  )
 
14
  tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
15
  # load the Lora model
16
  model = PeftModel.from_pretrained(model, peft_model_id)
 
26
  with torch.cuda.amp.autocast():
27
  output_tokens = model.generate(**batch, max_new_tokens=256, eos_token_id=50258)
28
 
29
+ response = tokenizer.batch_decode(output_tokens.detach().cpu().numpy(), skip_special_tokens=False)[0]
 
 
30
 
31
  return response[response.find("entities") : response.find("<EP>")]
32