michaelfeil commited on
Commit
6b53f65
·
verified ·
1 Parent(s): bdf6773

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -0
README.md CHANGED
@@ -1,6 +1,41 @@
1
  ---
2
  {}
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  ```python
5
  #!/usr/bin/env python
6
  import torch
 
1
  ---
2
  {}
3
  ---
4
+
5
+ Deployment:
6
+ ```
7
+ build_commands: []
8
+ external_package_dirs: []
9
+ model_metadata: {}
10
+ model_name: fp8-baseten/example-Meta-Llama-3-70B-InstructForSequenceClassification
11
+ python_version: py39
12
+ requirements: []
13
+ resources:
14
+ accelerator: H100:1
15
+ cpu: "1"
16
+ memory: 64Gi
17
+ use_gpu: true
18
+ secrets:
19
+ hf_access_token: set token in baseten workspace
20
+ system_packages: []
21
+ trt_llm:
22
+ build:
23
+ base_model: encoder
24
+ # automatically infered from config[max_position_embeddings]
25
+ max_seq_len: 42
26
+ # max_batch_size per dynamic batch, recommended to stay at 32
27
+ max_batch_size: 32
28
+ # max num tokens per dynamic batch, strongly recommended to keep this number
29
+ max_num_tokens: 16384
30
+ checkpoint_repository:
31
+ source: HF
32
+ repo: "baseten/example-Meta-Llama-3-70B-InstructForSequenceClassification"
33
+ revision: "main" # hf revision hash
34
+ quantization_type: fp8
35
+ num_builder_gpus: 4
36
+ ```
37
+
38
+ Reproduce this model:
39
  ```python
40
  #!/usr/bin/env python
41
  import torch