Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- src/routes/responses.ts +27 -3
src/routes/responses.ts
CHANGED
|
@@ -35,6 +35,23 @@ class StreamingError extends Error {
|
|
| 35 |
type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
|
| 36 |
const SEQUENCE_NUMBER_PLACEHOLDER = -1;
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
export const postCreateResponse = async (
|
| 39 |
req: ValidatedRequest<CreateResponseParams>,
|
| 40 |
res: ExpressResponse
|
|
@@ -169,6 +186,11 @@ async function* innerRunStream(
|
|
| 169 |
return;
|
| 170 |
}
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
// Return early if not supported param
|
| 173 |
if (req.body.reasoning?.summary && req.body.reasoning?.summary !== "auto") {
|
| 174 |
throw new Error(`Not implemented: only 'auto' summary is supported. Got '${req.body.reasoning?.summary}'`);
|
|
@@ -429,7 +451,7 @@ async function* innerRunStream(
|
|
| 429 |
do {
|
| 430 |
previousMessageCount = currentMessageCount;
|
| 431 |
|
| 432 |
-
for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping)) {
|
| 433 |
yield event;
|
| 434 |
}
|
| 435 |
|
|
@@ -499,14 +521,16 @@ async function* handleOneTurnStream(
|
|
| 499 |
apiKey: string | undefined,
|
| 500 |
payload: ChatCompletionCreateParamsStreaming,
|
| 501 |
responseObject: IncompleteResponse,
|
| 502 |
-
mcpToolsMapping: Record<string, McpServerParams
|
|
|
|
| 503 |
): AsyncGenerator<PatchedResponseStreamEvent> {
|
| 504 |
const client = new OpenAI({
|
| 505 |
baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
|
| 506 |
apiKey: apiKey,
|
|
|
|
| 507 |
});
|
| 508 |
-
console.log(payload);
|
| 509 |
console.log("payload as JSON", JSON.stringify(payload, null, 2));
|
|
|
|
| 510 |
const stream = await client.chat.completions.create(payload);
|
| 511 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
| 512 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|
|
|
|
| 35 |
type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
|
| 36 |
const SEQUENCE_NUMBER_PLACEHOLDER = -1;
|
| 37 |
|
| 38 |
+
// All headers are forwarded by default, except these ones.
|
| 39 |
+
const NOT_FORWARDED_HEADERS = new Set([
|
| 40 |
+
"accept",
|
| 41 |
+
"accept-encoding",
|
| 42 |
+
"authorization",
|
| 43 |
+
"connection",
|
| 44 |
+
"content-length",
|
| 45 |
+
"content-type",
|
| 46 |
+
"host",
|
| 47 |
+
"keep-alive",
|
| 48 |
+
"te",
|
| 49 |
+
"trailer",
|
| 50 |
+
"trailers",
|
| 51 |
+
"transfer-encoding",
|
| 52 |
+
"upgrade",
|
| 53 |
+
]);
|
| 54 |
+
|
| 55 |
export const postCreateResponse = async (
|
| 56 |
req: ValidatedRequest<CreateResponseParams>,
|
| 57 |
res: ExpressResponse
|
|
|
|
| 186 |
return;
|
| 187 |
}
|
| 188 |
|
| 189 |
+
// Forward headers (except authorization handled separately)
|
| 190 |
+
const defaultHeaders = Object.fromEntries(
|
| 191 |
+
Object.entries(req.headers).filter(([key]) => !NOT_FORWARDED_HEADERS.has(key.toLowerCase()))
|
| 192 |
+
) as Record<string, string>;
|
| 193 |
+
|
| 194 |
// Return early if not supported param
|
| 195 |
if (req.body.reasoning?.summary && req.body.reasoning?.summary !== "auto") {
|
| 196 |
throw new Error(`Not implemented: only 'auto' summary is supported. Got '${req.body.reasoning?.summary}'`);
|
|
|
|
| 451 |
do {
|
| 452 |
previousMessageCount = currentMessageCount;
|
| 453 |
|
| 454 |
+
for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping, defaultHeaders)) {
|
| 455 |
yield event;
|
| 456 |
}
|
| 457 |
|
|
|
|
| 521 |
apiKey: string | undefined,
|
| 522 |
payload: ChatCompletionCreateParamsStreaming,
|
| 523 |
responseObject: IncompleteResponse,
|
| 524 |
+
mcpToolsMapping: Record<string, McpServerParams>,
|
| 525 |
+
defaultHeaders: Record<string, string>
|
| 526 |
): AsyncGenerator<PatchedResponseStreamEvent> {
|
| 527 |
const client = new OpenAI({
|
| 528 |
baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
|
| 529 |
apiKey: apiKey,
|
| 530 |
+
defaultHeaders,
|
| 531 |
});
|
|
|
|
| 532 |
console.log("payload as JSON", JSON.stringify(payload, null, 2));
|
| 533 |
+
console.log("defaultHeaders", defaultHeaders);
|
| 534 |
const stream = await client.chat.completions.create(payload);
|
| 535 |
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
| 536 |
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|