Wauplin HF Staff commited on
Commit
613001f
·
verified ·
1 Parent(s): dce90f2

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. src/routes/responses.ts +27 -3
src/routes/responses.ts CHANGED
@@ -35,6 +35,23 @@ class StreamingError extends Error {
35
  type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
36
  const SEQUENCE_NUMBER_PLACEHOLDER = -1;
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  export const postCreateResponse = async (
39
  req: ValidatedRequest<CreateResponseParams>,
40
  res: ExpressResponse
@@ -169,6 +186,11 @@ async function* innerRunStream(
169
  return;
170
  }
171
 
 
 
 
 
 
172
  // Return early if not supported param
173
  if (req.body.reasoning?.summary && req.body.reasoning?.summary !== "auto") {
174
  throw new Error(`Not implemented: only 'auto' summary is supported. Got '${req.body.reasoning?.summary}'`);
@@ -429,7 +451,7 @@ async function* innerRunStream(
429
  do {
430
  previousMessageCount = currentMessageCount;
431
 
432
- for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping)) {
433
  yield event;
434
  }
435
 
@@ -499,14 +521,16 @@ async function* handleOneTurnStream(
499
  apiKey: string | undefined,
500
  payload: ChatCompletionCreateParamsStreaming,
501
  responseObject: IncompleteResponse,
502
- mcpToolsMapping: Record<string, McpServerParams>
 
503
  ): AsyncGenerator<PatchedResponseStreamEvent> {
504
  const client = new OpenAI({
505
  baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
506
  apiKey: apiKey,
 
507
  });
508
- console.log(payload);
509
  console.log("payload as JSON", JSON.stringify(payload, null, 2));
 
510
  const stream = await client.chat.completions.create(payload);
511
  let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
512
  let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
 
35
  type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
36
  const SEQUENCE_NUMBER_PLACEHOLDER = -1;
37
 
38
+ // All headers are forwarded by default, except these ones.
39
+ const NOT_FORWARDED_HEADERS = new Set([
40
+ "accept",
41
+ "accept-encoding",
42
+ "authorization",
43
+ "connection",
44
+ "content-length",
45
+ "content-type",
46
+ "host",
47
+ "keep-alive",
48
+ "te",
49
+ "trailer",
50
+ "trailers",
51
+ "transfer-encoding",
52
+ "upgrade",
53
+ ]);
54
+
55
  export const postCreateResponse = async (
56
  req: ValidatedRequest<CreateResponseParams>,
57
  res: ExpressResponse
 
186
  return;
187
  }
188
 
189
+ // Forward headers (except authorization handled separately)
190
+ const defaultHeaders = Object.fromEntries(
191
+ Object.entries(req.headers).filter(([key]) => !NOT_FORWARDED_HEADERS.has(key.toLowerCase()))
192
+ ) as Record<string, string>;
193
+
194
  // Return early if not supported param
195
  if (req.body.reasoning?.summary && req.body.reasoning?.summary !== "auto") {
196
  throw new Error(`Not implemented: only 'auto' summary is supported. Got '${req.body.reasoning?.summary}'`);
 
451
  do {
452
  previousMessageCount = currentMessageCount;
453
 
454
+ for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping, defaultHeaders)) {
455
  yield event;
456
  }
457
 
 
521
  apiKey: string | undefined,
522
  payload: ChatCompletionCreateParamsStreaming,
523
  responseObject: IncompleteResponse,
524
+ mcpToolsMapping: Record<string, McpServerParams>,
525
+ defaultHeaders: Record<string, string>
526
  ): AsyncGenerator<PatchedResponseStreamEvent> {
527
  const client = new OpenAI({
528
  baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
529
  apiKey: apiKey,
530
+ defaultHeaders,
531
  });
 
532
  console.log("payload as JSON", JSON.stringify(payload, null, 2));
533
+ console.log("defaultHeaders", defaultHeaders);
534
  const stream = await client.chat.completions.create(payload);
535
  let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
536
  let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;