Spaces:

kelvin-t-lu
/

chatbot

Paused

App Files Files Community

chatbot / helm /h2ogpt-chart /values.yaml

kelvin-t-lu

init

dbd2ac6 over 1 year ago

raw

history blame contribute delete

2.33 kB

	nameOverride: ""
	fullnameOverride: h2ogpt

	h2ogpt:
	enabled: true
	replicaCount: 1
	image:
	repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
	pullPolicy: IfNotPresent

	storage:
	size: 128Gi
	class: ebs-csi

	overrideConfig:
	base_model: h2oai/h2ogpt-4096-llama2-7b-chat
	use_safetensors: True
	prompt_type: llama2
	save_dir: /workspace/save/
	use_gpu_id: False
	score_model: None
	max_max_new_tokens: 2048
	max_new_tokens: 1024

	service:
	type: NodePort
	webPort: 80
	gptPort: 8888

	updateStrategy:
	type: RollingUpdate

	podSecurityContext:
	runAsNonRoot: true
	runAsUser: 1000
	runAsGroup: 1000
	fsGroup: 1000

	securityContext:
	runAsNonRoot: true
	allowPrivilegeEscalation: false
	capabilities:
	drop:
	- ALL
	seccompProfile:
	type: RuntimeDefault

	resources:
	nodeSelector:
	tolerations:

	podAnnotations: {}
	podLabels: {}
	autoscaling: {}

	tgi:
	enabled: false
	replicaCount: 1

	image:
	repository: ghcr.io/huggingface/text-generation-inference
	tag: 0.9.3
	pullPolicy: IfNotPresent

	storage:
	size: 512Gi
	class: ebs-csi

	overrideConfig:
	hfSecret:
	containerArgs:

	service:
	type: ClusterIP
	port: 8080

	updateStrategy:
	type: RollingUpdate

	podSecurityContext:
	securityContext:

	resources:
	nodeSelector:
	tolerations:

	podAnnotations: {}
	podLabels: {}
	autoscaling: {}

	vllm:
	enabled: false
	replicaCount: 1

	image:
	repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
	pullPolicy: IfNotPresent

	imagePullSecrets:

	storage:
	size: 512Gi
	class: ebs-csi

	overrideConfig:

	containerArgs:
	- "--model"
	- h2oai/h2ogpt-4096-llama2-7b-chat
	- "--tokenizer"
	- hf-internal-testing/llama-tokenizer
	- "--tensor-parallel-size"
	- 2
	- "--seed"
	- 1234
	- "--trust-remote-code"

	service:
	type: ClusterIP
	port: 5000

	updateStrategy:
	type: RollingUpdate

	podSecurityContext:
	runAsNonRoot: true
	runAsUser: 1000
	runAsGroup: 1000
	fsGroup: 1000

	securityContext:
	runAsNonRoot: true
	allowPrivilegeEscalation: false
	capabilities:
	drop:
	- ALL
	seccompProfile:

	resources:

	nodeSelector:

	tolerations:

	podAnnotations: {}
	podLabels: {}
	autoscaling: {}