Spaces:

Draken007
/

geochatbot

Running

App Files Files Community

geochatbot / llm /Lib /site-packages /ctransformers /hub.py

Draken007

Upload 7228 files

2a0bc63 verified 11 months ago

raw

history blame contribute delete

8.48 kB

	import json
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Optional

	from huggingface_hub import HfApi, snapshot_download
	from huggingface_hub.utils import validate_repo_id, HFValidationError

	from .llm import Config, LLM


	def get_path_type(path: str) -> Optional[str]:
	p = Path(path)
	if p.is_file():
	return "file"
	elif p.is_dir():
	return "dir"
	try:
	validate_repo_id(path)
	return "repo"
	except HFValidationError:
	pass


	@dataclass
	class AutoConfig:
	config: Config
	model_type: Optional[str] = None

	@classmethod
	def from_pretrained(
	cls,
	model_path_or_repo_id: str,
	local_files_only: bool = False,
	revision: Optional[str] = None,
	**kwargs,
	) -> "AutoConfig":
	path_type = get_path_type(model_path_or_repo_id)
	if not path_type:
	raise ValueError(f"Model path '{model_path_or_repo_id}' doesn't exist.")

	config = Config()
	auto_config = AutoConfig(config=config)

	if path_type == "dir":
	cls._update_from_dir(model_path_or_repo_id, auto_config)
	elif path_type == "repo":
	cls._update_from_repo(
	model_path_or_repo_id,
	auto_config,
	local_files_only=local_files_only,
	revision=revision,
	)

	for k, v in kwargs.items():
	if not hasattr(config, k):
	raise TypeError(
	f"'{k}' is an invalid keyword argument for from_pretrained()"
	)
	setattr(config, k, v)

	return auto_config

	@classmethod
	def _update_from_repo(
	cls,
	repo_id: str,
	auto_config: "AutoConfig",
	local_files_only: bool,
	revision: Optional[str] = None,
	) -> None:
	path = snapshot_download(
	repo_id=repo_id,
	allow_patterns="config.json",
	local_files_only=local_files_only,
	revision=revision,
	)
	cls._update_from_dir(path, auto_config)

	@classmethod
	def _update_from_dir(cls, path: str, auto_config: "AutoConfig") -> None:
	path = (Path(path) / "config.json").resolve()
	if path.is_file():
	cls._update_from_file(path, auto_config)

	@classmethod
	def _update_from_file(cls, path: str, auto_config: "AutoConfig") -> None:
	with open(path) as f:
	config = json.load(f)

	auto_config.model_type = config.get("model_type")
	params = config.get("task_specific_params", {})
	params = params.get("text-generation", {})
	for name in [
	"top_k",
	"top_p",
	"temperature",
	"repetition_penalty",
	"last_n_tokens",
	]:
	value = params.get(name)
	if value is not None:
	setattr(auto_config.config, name, value)


	class AutoModelForCausalLM:
	@classmethod
	def from_pretrained(
	cls,
	model_path_or_repo_id: str,
	*,
	model_type: Optional[str] = None,
	model_file: Optional[str] = None,
	config: Optional[AutoConfig] = None,
	lib: Optional[str] = None,
	local_files_only: bool = False,
	revision: Optional[str] = None,
	hf: bool = False,
	**kwargs,
	) -> LLM:
	"""Loads the language model from a local file or remote repo.

	Args:
	model_path_or_repo_id: The path to a model file or directory or the
	name of a Hugging Face Hub model repo.
	model_type: The model type.
	model_file: The name of the model file in repo or directory.
	config: `AutoConfig` object.
	lib: The path to a shared library or one of `avx2`, `avx`, `basic`.
	local_files_only: Whether or not to only look at local files
	(i.e., do not try to download the model).
	revision: The specific model version to use. It can be a branch
	name, a tag name, or a commit id.
	hf: Whether to create a Hugging Face Transformers model.

	Returns:
	`LLM` object.
	"""
	if model_type is None and "gptq" in str(model_path_or_repo_id).lower():
	model_type = "gptq"
	if model_type == "gptq":
	from . import gptq

	return gptq.AutoModelForCausalLM.from_pretrained(
	model_path_or_repo_id,
	local_files_only=local_files_only,
	revision=revision,
	**kwargs,
	)

	config = config or AutoConfig.from_pretrained(
	model_path_or_repo_id,
	local_files_only=local_files_only,
	revision=revision,
	**kwargs,
	)
	model_type = model_type or config.model_type

	path_type = get_path_type(model_path_or_repo_id)
	model_path = None
	if path_type == "file":
	model_path = model_path_or_repo_id
	elif path_type == "dir":
	model_path = cls._find_model_path_from_dir(
	model_path_or_repo_id, model_file
	)
	elif path_type == "repo":
	model_path = cls._find_model_path_from_repo(
	model_path_or_repo_id,
	model_file,
	local_files_only=local_files_only,
	revision=revision,
	)

	llm = LLM(
	model_path=model_path,
	model_type=model_type,
	config=config.config,
	lib=lib,
	)
	if not hf:
	return llm

	from .transformers import CTransformersConfig, CTransformersModel

	config = CTransformersConfig(name_or_path=str(model_path_or_repo_id))
	return CTransformersModel(config=config, llm=llm)

	@classmethod
	def _find_model_path_from_repo(
	cls,
	repo_id: str,
	filename: Optional[str],
	local_files_only: bool,
	revision: Optional[str] = None,
	) -> str:
	if not filename and not local_files_only:
	filename = cls._find_model_file_from_repo(
	repo_id=repo_id,
	revision=revision,
	)
	allow_patterns = filename or [".bin", ".gguf"]
	path = snapshot_download(
	repo_id=repo_id,
	allow_patterns=allow_patterns,
	local_files_only=local_files_only,
	revision=revision,
	)
	return cls._find_model_path_from_dir(path, filename=filename)

	@classmethod
	def _find_model_file_from_repo(
	cls,
	repo_id: str,
	revision: Optional[str] = None,
	) -> Optional[str]:
	api = HfApi()
	repo_info = api.repo_info(
	repo_id=repo_id,
	files_metadata=True,
	revision=revision,
	)
	files = [
	(f.size, f.rfilename)
	for f in repo_info.siblings
	if f.rfilename.endswith(".bin") or f.rfilename.endswith(".gguf")
	]
	if not files:
	raise ValueError(f"No model file found in repo '{repo_id}'")
	return min(files)[1]

	@classmethod
	def _find_model_path_from_dir(
	cls,
	path: str,
	filename: Optional[str] = None,
	) -> str:
	path = Path(path).resolve()
	if filename:
	file = (path / filename).resolve()
	if not file.is_file():
	raise ValueError(f"Model file '{filename}' not found in '{path}'")
	return str(file)

	files = [
	(f.stat().st_size, f)
	for f in path.iterdir()
	if f.is_file() and (f.name.endswith(".bin") or f.name.endswith(".gguf"))
	]
	if not files:
	raise ValueError(f"No model file found in directory '{path}'")
	file = min(files)[1]
	return str(file.resolve())


	class AutoTokenizer:
	@classmethod
	def from_pretrained(cls, model):
	from .transformers import CTransformersModel, CTransformersTokenizer

	if not isinstance(model, CTransformersModel):
	raise TypeError(
	f"Currently `AutoTokenizer.from_pretrained` only accepts a model object. Please use:\n\n"
	" model = AutoModelForCausalLM.from_pretrained(..., hf=True)\n"
	" tokenizer = AutoTokenizer.from_pretrained(model)"
	)

	return CTransformersTokenizer(model._llm)