chatbot / helm /h2ogpt-chart /values.yaml
kelvin-t-lu's picture
init
dbd2ac6
nameOverride: ""
fullnameOverride: h2ogpt
h2ogpt:
enabled: true
replicaCount: 1
image:
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
pullPolicy: IfNotPresent
storage:
size: 128Gi
class: ebs-csi
overrideConfig:
base_model: h2oai/h2ogpt-4096-llama2-7b-chat
use_safetensors: True
prompt_type: llama2
save_dir: /workspace/save/
use_gpu_id: False
score_model: None
max_max_new_tokens: 2048
max_new_tokens: 1024
service:
type: NodePort
webPort: 80
gptPort: 8888
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}
tgi:
enabled: false
replicaCount: 1
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: 0.9.3
pullPolicy: IfNotPresent
storage:
size: 512Gi
class: ebs-csi
overrideConfig:
hfSecret:
containerArgs:
service:
type: ClusterIP
port: 8080
updateStrategy:
type: RollingUpdate
podSecurityContext:
securityContext:
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}
vllm:
enabled: false
replicaCount: 1
image:
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
pullPolicy: IfNotPresent
imagePullSecrets:
storage:
size: 512Gi
class: ebs-csi
overrideConfig:
containerArgs:
- "--model"
- h2oai/h2ogpt-4096-llama2-7b-chat
- "--tokenizer"
- hf-internal-testing/llama-tokenizer
- "--tensor-parallel-size"
- 2
- "--seed"
- 1234
- "--trust-remote-code"
service:
type: ClusterIP
port: 5000
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}