Spaces:
Paused
Paused
File size: 2,332 Bytes
dbd2ac6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
nameOverride: ""
fullnameOverride: h2ogpt
h2ogpt:
enabled: true
replicaCount: 1
image:
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
pullPolicy: IfNotPresent
storage:
size: 128Gi
class: ebs-csi
overrideConfig:
base_model: h2oai/h2ogpt-4096-llama2-7b-chat
use_safetensors: True
prompt_type: llama2
save_dir: /workspace/save/
use_gpu_id: False
score_model: None
max_max_new_tokens: 2048
max_new_tokens: 1024
service:
type: NodePort
webPort: 80
gptPort: 8888
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}
tgi:
enabled: false
replicaCount: 1
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: 0.9.3
pullPolicy: IfNotPresent
storage:
size: 512Gi
class: ebs-csi
overrideConfig:
hfSecret:
containerArgs:
service:
type: ClusterIP
port: 8080
updateStrategy:
type: RollingUpdate
podSecurityContext:
securityContext:
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}
vllm:
enabled: false
replicaCount: 1
image:
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
pullPolicy: IfNotPresent
imagePullSecrets:
storage:
size: 512Gi
class: ebs-csi
overrideConfig:
containerArgs:
- "--model"
- h2oai/h2ogpt-4096-llama2-7b-chat
- "--tokenizer"
- hf-internal-testing/llama-tokenizer
- "--tensor-parallel-size"
- 2
- "--seed"
- 1234
- "--trust-remote-code"
service:
type: ClusterIP
port: 5000
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}
|