Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- src/routes/responses.ts +27 -3
src/routes/responses.ts
CHANGED
@@ -35,6 +35,23 @@ class StreamingError extends Error {
|
|
35 |
type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
|
36 |
const SEQUENCE_NUMBER_PLACEHOLDER = -1;
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
export const postCreateResponse = async (
|
39 |
req: ValidatedRequest<CreateResponseParams>,
|
40 |
res: ExpressResponse
|
@@ -169,6 +186,11 @@ async function* innerRunStream(
|
|
169 |
return;
|
170 |
}
|
171 |
|
|
|
|
|
|
|
|
|
|
|
172 |
// Return early if not supported param
|
173 |
if (req.body.reasoning?.summary && req.body.reasoning?.summary !== "auto") {
|
174 |
throw new Error(`Not implemented: only 'auto' summary is supported. Got '${req.body.reasoning?.summary}'`);
|
@@ -429,7 +451,7 @@ async function* innerRunStream(
|
|
429 |
do {
|
430 |
previousMessageCount = currentMessageCount;
|
431 |
|
432 |
-
for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping)) {
|
433 |
yield event;
|
434 |
}
|
435 |
|
@@ -499,14 +521,16 @@ async function* handleOneTurnStream(
|
|
499 |
apiKey: string | undefined,
|
500 |
payload: ChatCompletionCreateParamsStreaming,
|
501 |
responseObject: IncompleteResponse,
|
502 |
-
mcpToolsMapping: Record<string, McpServerParams
|
|
|
503 |
): AsyncGenerator<PatchedResponseStreamEvent> {
|
504 |
const client = new OpenAI({
|
505 |
baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
|
506 |
apiKey: apiKey,
|
|
|
507 |
});
|
508 |
-
console.log(payload);
|
509 |
console.log("payload as JSON", JSON.stringify(payload, null, 2));
|
|
|
510 |
const stream = await client.chat.completions.create(payload);
|
511 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
512 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|
|
|
35 |
type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
|
36 |
const SEQUENCE_NUMBER_PLACEHOLDER = -1;
|
37 |
|
38 |
+
// All headers are forwarded by default, except these ones.
|
39 |
+
const NOT_FORWARDED_HEADERS = new Set([
|
40 |
+
"accept",
|
41 |
+
"accept-encoding",
|
42 |
+
"authorization",
|
43 |
+
"connection",
|
44 |
+
"content-length",
|
45 |
+
"content-type",
|
46 |
+
"host",
|
47 |
+
"keep-alive",
|
48 |
+
"te",
|
49 |
+
"trailer",
|
50 |
+
"trailers",
|
51 |
+
"transfer-encoding",
|
52 |
+
"upgrade",
|
53 |
+
]);
|
54 |
+
|
55 |
export const postCreateResponse = async (
|
56 |
req: ValidatedRequest<CreateResponseParams>,
|
57 |
res: ExpressResponse
|
|
|
186 |
return;
|
187 |
}
|
188 |
|
189 |
+
// Forward headers (except authorization handled separately)
|
190 |
+
const defaultHeaders = Object.fromEntries(
|
191 |
+
Object.entries(req.headers).filter(([key]) => !NOT_FORWARDED_HEADERS.has(key.toLowerCase()))
|
192 |
+
) as Record<string, string>;
|
193 |
+
|
194 |
// Return early if not supported param
|
195 |
if (req.body.reasoning?.summary && req.body.reasoning?.summary !== "auto") {
|
196 |
throw new Error(`Not implemented: only 'auto' summary is supported. Got '${req.body.reasoning?.summary}'`);
|
|
|
451 |
do {
|
452 |
previousMessageCount = currentMessageCount;
|
453 |
|
454 |
+
for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping, defaultHeaders)) {
|
455 |
yield event;
|
456 |
}
|
457 |
|
|
|
521 |
apiKey: string | undefined,
|
522 |
payload: ChatCompletionCreateParamsStreaming,
|
523 |
responseObject: IncompleteResponse,
|
524 |
+
mcpToolsMapping: Record<string, McpServerParams>,
|
525 |
+
defaultHeaders: Record<string, string>
|
526 |
): AsyncGenerator<PatchedResponseStreamEvent> {
|
527 |
const client = new OpenAI({
|
528 |
baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
|
529 |
apiKey: apiKey,
|
530 |
+
defaultHeaders,
|
531 |
});
|
|
|
532 |
console.log("payload as JSON", JSON.stringify(payload, null, 2));
|
533 |
+
console.log("defaultHeaders", defaultHeaders);
|
534 |
const stream = await client.chat.completions.create(payload);
|
535 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
536 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|