change model to sharded version
Browse files
app.py
CHANGED
@@ -6,13 +6,11 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
6 |
peft_model_id = "hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es"
|
7 |
config = PeftConfig.from_pretrained(peft_model_id)
|
8 |
model = AutoModelForCausalLM.from_pretrained(
|
9 |
-
|
10 |
return_dict=True,
|
11 |
load_in_8bit=True,
|
12 |
device_map="auto",
|
13 |
-
revision="half",
|
14 |
)
|
15 |
-
model.tie_weights()
|
16 |
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
17 |
# load the Lora model
|
18 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
@@ -28,9 +26,7 @@ def gen_entities(text):
|
|
28 |
with torch.cuda.amp.autocast():
|
29 |
output_tokens = model.generate(**batch, max_new_tokens=256, eos_token_id=50258)
|
30 |
|
31 |
-
response = tokenizer.batch_decode(
|
32 |
-
output_tokens.detach().cpu().numpy(), skip_special_tokens=False
|
33 |
-
)[0]
|
34 |
|
35 |
return response[response.find("entities") : response.find("<EP>")]
|
36 |
|
|
|
6 |
peft_model_id = "hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es"
|
7 |
config = PeftConfig.from_pretrained(peft_model_id)
|
8 |
model = AutoModelForCausalLM.from_pretrained(
|
9 |
+
"DavidFM43/bertin-gpt-j-6b-half-sharded",
|
10 |
return_dict=True,
|
11 |
load_in_8bit=True,
|
12 |
device_map="auto",
|
|
|
13 |
)
|
|
|
14 |
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
15 |
# load the Lora model
|
16 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
|
|
26 |
with torch.cuda.amp.autocast():
|
27 |
output_tokens = model.generate(**batch, max_new_tokens=256, eos_token_id=50258)
|
28 |
|
29 |
+
response = tokenizer.batch_decode(output_tokens.detach().cpu().numpy(), skip_special_tokens=False)[0]
|
|
|
|
|
30 |
|
31 |
return response[response.find("entities") : response.find("<EP>")]
|
32 |
|