Upload local.js
Browse files- reasoning/local.js +145 -0
reasoning/local.js
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// local.js
|
2 |
+
import { strictFormat } from '../utils/text.js';
|
3 |
+
import { log } from '../../logger.js';
|
4 |
+
|
5 |
+
export class Local {
|
6 |
+
constructor(model_name, url) {
|
7 |
+
this.model_name = model_name;
|
8 |
+
this.url = url || 'http://127.0.0.1:11434';
|
9 |
+
this.chat_endpoint = '/api/chat';
|
10 |
+
this.embedding_endpoint = '/api/embeddings';
|
11 |
+
}
|
12 |
+
|
13 |
+
/**
|
14 |
+
* Main method to handle chat requests.
|
15 |
+
*/
|
16 |
+
async sendRequest(turns, systemMessage) {
|
17 |
+
// Choose the model name or default to 'llama3'
|
18 |
+
const model = this.model_name || 'llama3';
|
19 |
+
|
20 |
+
// Format messages and inject the system message at the front
|
21 |
+
let messages = strictFormat(turns);
|
22 |
+
messages.unshift({ role: 'system', content: systemMessage });
|
23 |
+
console.log('Messages:', messages);
|
24 |
+
|
25 |
+
// We'll do up to 5 attempts for "deepseek-r1" if the <think> tags are mismatched
|
26 |
+
const maxAttempts = 5;
|
27 |
+
let attempt = 0;
|
28 |
+
let finalRes = null;
|
29 |
+
|
30 |
+
while (attempt < maxAttempts) {
|
31 |
+
attempt++;
|
32 |
+
console.log(`Awaiting local response... (model: ${model}, attempt: ${attempt})`);
|
33 |
+
|
34 |
+
// Perform the actual request (wrapped in a try/catch)
|
35 |
+
let res;
|
36 |
+
try {
|
37 |
+
const responseData = await this.send(this.chat_endpoint, {
|
38 |
+
model: model,
|
39 |
+
messages: messages,
|
40 |
+
stream: false
|
41 |
+
});
|
42 |
+
// The local endpoint apparently returns { message: { content: "..." } }
|
43 |
+
res = responseData?.message?.content || 'No response data.';
|
44 |
+
} catch (err) {
|
45 |
+
// If context length exceeded and we have turns to remove, try again with one fewer turn
|
46 |
+
if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
|
47 |
+
console.log('Context length exceeded, trying again with shorter context.');
|
48 |
+
return await this.sendRequest(turns.slice(1), systemMessage);
|
49 |
+
} else {
|
50 |
+
console.log(err);
|
51 |
+
res = 'My brain disconnected, try again.';
|
52 |
+
}
|
53 |
+
}
|
54 |
+
|
55 |
+
// If the model name includes "deepseek-r1", then we handle the <think> block
|
56 |
+
if (this.model_name && this.model_name.includes("deepseek-r1") || this.model_name.includes("Andy_3.5")) {
|
57 |
+
const hasOpenTag = res.includes("<think>");
|
58 |
+
const hasCloseTag = res.includes("</think>");
|
59 |
+
|
60 |
+
// If there's a partial mismatch, we regenerate the response
|
61 |
+
if ((hasOpenTag && !hasCloseTag) || (!hasOpenTag && hasCloseTag)) {
|
62 |
+
console.warn("Partial <think> block detected. Re-generating...");
|
63 |
+
// Attempt another loop iteration to get a complete or no-think response
|
64 |
+
continue;
|
65 |
+
}
|
66 |
+
|
67 |
+
// LOGGING:
|
68 |
+
// We only log if the response does not contain "Error:" or "exception:"
|
69 |
+
if (res && !res.includes("Error:") && !res.includes("exception:")) {
|
70 |
+
log(JSON.stringify(messages), res);
|
71 |
+
} else {
|
72 |
+
// If there’s a potential error in the content, skip logging
|
73 |
+
console.warn(`Not logging due to potential error in model response: ${res}`);
|
74 |
+
}
|
75 |
+
|
76 |
+
// If both tags appear, remove them (and everything inside)
|
77 |
+
if (hasOpenTag && hasCloseTag) {
|
78 |
+
res = res.replace(/<think>[\s\S]*?<\/think>/g, '');
|
79 |
+
}
|
80 |
+
}
|
81 |
+
|
82 |
+
// LOGGING:
|
83 |
+
// We only log if the response does not contain "Error:" or "exception:"
|
84 |
+
if (res && !res.includes("Error:") && !res.includes("exception:")) {
|
85 |
+
log(JSON.stringify(messages), res);
|
86 |
+
} else {
|
87 |
+
// If there’s a potential error in the content, skip logging
|
88 |
+
console.warn(`Not logging due to potential error in model response: ${res}`);
|
89 |
+
}
|
90 |
+
|
91 |
+
// We made it here with either a fully valid or not-needed to handle <think> scenario
|
92 |
+
finalRes = res;
|
93 |
+
break; // Break out of the while loop
|
94 |
+
}
|
95 |
+
|
96 |
+
// If after max attempts we STILL have partial tags, finalRes might be partial
|
97 |
+
// Or we never set finalRes because all attempts threw partial tags
|
98 |
+
if (finalRes == null) {
|
99 |
+
// This means we kept continuing in the loop but never got a break
|
100 |
+
console.warn("Could not get a valid <think> block or normal response after max attempts.");
|
101 |
+
finalRes = 'Response incomplete, please try again.';
|
102 |
+
}
|
103 |
+
|
104 |
+
return finalRes;
|
105 |
+
}
|
106 |
+
|
107 |
+
/**
|
108 |
+
* Embedding method (unchanged).
|
109 |
+
*/
|
110 |
+
async embed(text) {
|
111 |
+
let model = this.model_name || 'nomic-embed-text';
|
112 |
+
let body = { model: model, prompt: text };
|
113 |
+
let res = await this.send(this.embedding_endpoint, body);
|
114 |
+
return res['embedding'];
|
115 |
+
}
|
116 |
+
|
117 |
+
/**
|
118 |
+
* Generic send method for local endpoint.
|
119 |
+
*/
|
120 |
+
async send(endpoint, body) {
|
121 |
+
const url = new URL(endpoint, this.url);
|
122 |
+
const method = 'POST';
|
123 |
+
const headers = new Headers();
|
124 |
+
const request = new Request(url, {
|
125 |
+
method,
|
126 |
+
headers,
|
127 |
+
body: JSON.stringify(body)
|
128 |
+
});
|
129 |
+
|
130 |
+
let data = null;
|
131 |
+
try {
|
132 |
+
const res = await fetch(request);
|
133 |
+
if (res.ok) {
|
134 |
+
data = await res.json();
|
135 |
+
} else {
|
136 |
+
throw new Error(`Ollama Status: ${res.status}`);
|
137 |
+
}
|
138 |
+
} catch (err) {
|
139 |
+
console.error('Failed to send Ollama request.');
|
140 |
+
console.error(err);
|
141 |
+
throw err; // rethrow so we can catch it in the calling method
|
142 |
+
}
|
143 |
+
return data;
|
144 |
+
}
|
145 |
+
}
|