|
---
|
|
library_name: transformers
|
|
license: apache-2.0
|
|
datasets:
|
|
- benchang1110/pretrainedtw
|
|
- HuggingFaceTB/cosmopedia-100k
|
|
language:
|
|
- zh
|
|
widget:
|
|
- text: '在很久以前,這座島上'
|
|
example_title: Example1
|
|
|
|
---
|
|
|
|
# Model Card for Model ID
|
|
|
|
This is a continue-pretrained version of [Tinyllama](TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) tailored for traditional Chinese. The continue-pretraining dataset contains roughly 2B tokens.
|
|
|
|
# Usage
|
|
```python
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
import torch
|
|
|
|
def generate_response(input):
|
|
'''
|
|
simple test for the model
|
|
'''
|
|
# tokenzize the input
|
|
tokenized_input = tokenizer.encode_plus(input, return_tensors='pt').to(device)
|
|
|
|
# generate the response
|
|
outputs = model.generate(
|
|
input_ids=tokenized_input['input_ids'],
|
|
attention_mask=tokenized_input['attention_mask'],
|
|
pad_token_id=tokenizer.pad_token_id,
|
|
do_sample=False,
|
|
repetition_penalty=1.3,
|
|
max_length=500
|
|
)
|
|
|
|
# decode the response
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
if __name__ == '__main__':
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
model = AutoModelForCausalLM.from_pretrained("DavidLanz/Taiwan-tinyllama-v1.0-chat",device_map=device,torch_dtype=torch.bfloat16)
|
|
tokenizer = AutoTokenizer.from_pretrained("DavidLanz/Taiwan-tinyllama-v1.0-chat")
|
|
while(True):
|
|
text = input("input a simple prompt:")
|
|
print('System:', generate_response(text))
|
|
```
|
|
Using bfloat16, the VRAM required is around 3GB!!!
|
|
|