Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
change billing to org
Browse files
model_params.cfg
CHANGED
@@ -3,6 +3,8 @@ PROVIDER = huggingface
|
|
3 |
MODEL = meta-llama/Meta-Llama-3-8B-Instruct
|
4 |
MAX_TOKENS = 512
|
5 |
TEMPERATURE = 0.2
|
|
|
|
|
6 |
|
7 |
[reader]
|
8 |
TYPE = INF_PROVIDERS
|
|
|
3 |
MODEL = meta-llama/Meta-Llama-3-8B-Instruct
|
4 |
MAX_TOKENS = 512
|
5 |
TEMPERATURE = 0.2
|
6 |
+
INFERENCE_PROVIDER = novita
|
7 |
+
ORGANIZATION = GIZ
|
8 |
|
9 |
[reader]
|
10 |
TYPE = INF_PROVIDERS
|
utils/__pycache__/generator.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/generator.cpython-310.pyc and b/utils/__pycache__/generator.cpython-310.pyc differ
|
|
utils/__pycache__/retriever.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/retriever.cpython-310.pyc and b/utils/__pycache__/retriever.cpython-310.pyc differ
|
|
utils/__pycache__/whisp_api.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/whisp_api.cpython-310.pyc and b/utils/__pycache__/whisp_api.cpython-310.pyc differ
|
|
utils/generator.py
CHANGED
@@ -34,10 +34,10 @@ def getconfig(configfile_path: str):
|
|
34 |
def get_auth(provider: str) -> dict:
|
35 |
"""Get authentication configuration for different providers"""
|
36 |
auth_configs = {
|
37 |
-
|
38 |
"huggingface": {"api_key": os.getenv("HF_TOKEN")},
|
39 |
-
|
40 |
-
|
41 |
}
|
42 |
|
43 |
if provider not in auth_configs:
|
@@ -61,6 +61,8 @@ PROVIDER = config.get("generator", "PROVIDER")
|
|
61 |
MODEL = config.get("generator", "MODEL")
|
62 |
MAX_TOKENS = int(config.get("generator", "MAX_TOKENS"))
|
63 |
TEMPERATURE = float(config.get("generator", "TEMPERATURE"))
|
|
|
|
|
64 |
|
65 |
# Set up authentication for the selected provider
|
66 |
auth_config = get_auth(PROVIDER)
|
@@ -71,7 +73,6 @@ def get_chat_model():
|
|
71 |
"temperature": TEMPERATURE,
|
72 |
"max_tokens": MAX_TOKENS,
|
73 |
}
|
74 |
-
logging.info(f"provider is {PROVIDER}")
|
75 |
|
76 |
# if PROVIDER == "openai":
|
77 |
# return ChatOpenAI(
|
@@ -90,13 +91,15 @@ def get_chat_model():
|
|
90 |
# model=MODEL,
|
91 |
# cohere_api_key=auth_config["api_key"],
|
92 |
# **common_params
|
93 |
-
#
|
94 |
if PROVIDER == "huggingface":
|
95 |
# Initialize HuggingFaceEndpoint with explicit parameters
|
96 |
llm = HuggingFaceEndpoint(
|
97 |
repo_id=MODEL,
|
98 |
huggingfacehub_api_token=auth_config["api_key"],
|
99 |
task="text-generation",
|
|
|
|
|
100 |
temperature=TEMPERATURE,
|
101 |
max_new_tokens=MAX_TOKENS
|
102 |
)
|
@@ -256,7 +259,9 @@ async def generate(query: str, context: Union[str, List[Dict[str, Any]]]) -> str
|
|
256 |
try:
|
257 |
messages = build_messages(query, formatted_context)
|
258 |
answer = await _call_llm(messages)
|
|
|
259 |
return answer
|
|
|
260 |
except Exception as e:
|
261 |
logging.exception("Generation failed")
|
262 |
-
return f"Error: {str(e)}"
|
|
|
34 |
def get_auth(provider: str) -> dict:
|
35 |
"""Get authentication configuration for different providers"""
|
36 |
auth_configs = {
|
37 |
+
"openai": {"api_key": os.getenv("OPENAI_API_KEY")},
|
38 |
"huggingface": {"api_key": os.getenv("HF_TOKEN")},
|
39 |
+
"anthropic": {"api_key": os.getenv("ANTHROPIC_API_KEY")},
|
40 |
+
"cohere": {"api_key": os.getenv("COHERE_API_KEY")},
|
41 |
}
|
42 |
|
43 |
if provider not in auth_configs:
|
|
|
61 |
MODEL = config.get("generator", "MODEL")
|
62 |
MAX_TOKENS = int(config.get("generator", "MAX_TOKENS"))
|
63 |
TEMPERATURE = float(config.get("generator", "TEMPERATURE"))
|
64 |
+
INFERENCE_PROVIDER = config.get("generator", "INFERENCE_PROVIDER")
|
65 |
+
ORGANIZATION = config.get("generator", "ORGANIZATION")
|
66 |
|
67 |
# Set up authentication for the selected provider
|
68 |
auth_config = get_auth(PROVIDER)
|
|
|
73 |
"temperature": TEMPERATURE,
|
74 |
"max_tokens": MAX_TOKENS,
|
75 |
}
|
|
|
76 |
|
77 |
# if PROVIDER == "openai":
|
78 |
# return ChatOpenAI(
|
|
|
91 |
# model=MODEL,
|
92 |
# cohere_api_key=auth_config["api_key"],
|
93 |
# **common_params
|
94 |
+
# )
|
95 |
if PROVIDER == "huggingface":
|
96 |
# Initialize HuggingFaceEndpoint with explicit parameters
|
97 |
llm = HuggingFaceEndpoint(
|
98 |
repo_id=MODEL,
|
99 |
huggingfacehub_api_token=auth_config["api_key"],
|
100 |
task="text-generation",
|
101 |
+
provider=INFERENCE_PROVIDER,
|
102 |
+
server_kwargs={"bill_to": ORGANIZATION},
|
103 |
temperature=TEMPERATURE,
|
104 |
max_new_tokens=MAX_TOKENS
|
105 |
)
|
|
|
259 |
try:
|
260 |
messages = build_messages(query, formatted_context)
|
261 |
answer = await _call_llm(messages)
|
262 |
+
|
263 |
return answer
|
264 |
+
|
265 |
except Exception as e:
|
266 |
logging.exception("Generation failed")
|
267 |
+
return f"Error: {str(e)}"
|