Sweaterdog commited on
Commit
9ad451d
·
verified ·
1 Parent(s): 22772ae

Upload local.js

Browse files
Files changed (1) hide show
  1. reasoning/local.js +145 -0
reasoning/local.js ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // local.js
2
+ import { strictFormat } from '../utils/text.js';
3
+ import { log } from '../../logger.js';
4
+
5
+ export class Local {
6
+ constructor(model_name, url) {
7
+ this.model_name = model_name;
8
+ this.url = url || 'http://127.0.0.1:11434';
9
+ this.chat_endpoint = '/api/chat';
10
+ this.embedding_endpoint = '/api/embeddings';
11
+ }
12
+
13
+ /**
14
+ * Main method to handle chat requests.
15
+ */
16
+ async sendRequest(turns, systemMessage) {
17
+ // Choose the model name or default to 'llama3'
18
+ const model = this.model_name || 'llama3';
19
+
20
+ // Format messages and inject the system message at the front
21
+ let messages = strictFormat(turns);
22
+ messages.unshift({ role: 'system', content: systemMessage });
23
+ console.log('Messages:', messages);
24
+
25
+ // We'll do up to 5 attempts for "deepseek-r1" if the <think> tags are mismatched
26
+ const maxAttempts = 5;
27
+ let attempt = 0;
28
+ let finalRes = null;
29
+
30
+ while (attempt < maxAttempts) {
31
+ attempt++;
32
+ console.log(`Awaiting local response... (model: ${model}, attempt: ${attempt})`);
33
+
34
+ // Perform the actual request (wrapped in a try/catch)
35
+ let res;
36
+ try {
37
+ const responseData = await this.send(this.chat_endpoint, {
38
+ model: model,
39
+ messages: messages,
40
+ stream: false
41
+ });
42
+ // The local endpoint apparently returns { message: { content: "..." } }
43
+ res = responseData?.message?.content || 'No response data.';
44
+ } catch (err) {
45
+ // If context length exceeded and we have turns to remove, try again with one fewer turn
46
+ if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
47
+ console.log('Context length exceeded, trying again with shorter context.');
48
+ return await this.sendRequest(turns.slice(1), systemMessage);
49
+ } else {
50
+ console.log(err);
51
+ res = 'My brain disconnected, try again.';
52
+ }
53
+ }
54
+
55
+ // If the model name includes "deepseek-r1", then we handle the <think> block
56
+ if (this.model_name && this.model_name.includes("deepseek-r1") || this.model_name.includes("Andy_3.5")) {
57
+ const hasOpenTag = res.includes("<think>");
58
+ const hasCloseTag = res.includes("</think>");
59
+
60
+ // If there's a partial mismatch, we regenerate the response
61
+ if ((hasOpenTag && !hasCloseTag) || (!hasOpenTag && hasCloseTag)) {
62
+ console.warn("Partial <think> block detected. Re-generating...");
63
+ // Attempt another loop iteration to get a complete or no-think response
64
+ continue;
65
+ }
66
+
67
+ // LOGGING:
68
+ // We only log if the response does not contain "Error:" or "exception:"
69
+ if (res && !res.includes("Error:") && !res.includes("exception:")) {
70
+ log(JSON.stringify(messages), res);
71
+ } else {
72
+ // If there’s a potential error in the content, skip logging
73
+ console.warn(`Not logging due to potential error in model response: ${res}`);
74
+ }
75
+
76
+ // If both tags appear, remove them (and everything inside)
77
+ if (hasOpenTag && hasCloseTag) {
78
+ res = res.replace(/<think>[\s\S]*?<\/think>/g, '');
79
+ }
80
+ }
81
+
82
+ // LOGGING:
83
+ // We only log if the response does not contain "Error:" or "exception:"
84
+ if (res && !res.includes("Error:") && !res.includes("exception:")) {
85
+ log(JSON.stringify(messages), res);
86
+ } else {
87
+ // If there’s a potential error in the content, skip logging
88
+ console.warn(`Not logging due to potential error in model response: ${res}`);
89
+ }
90
+
91
+ // We made it here with either a fully valid or not-needed to handle <think> scenario
92
+ finalRes = res;
93
+ break; // Break out of the while loop
94
+ }
95
+
96
+ // If after max attempts we STILL have partial tags, finalRes might be partial
97
+ // Or we never set finalRes because all attempts threw partial tags
98
+ if (finalRes == null) {
99
+ // This means we kept continuing in the loop but never got a break
100
+ console.warn("Could not get a valid <think> block or normal response after max attempts.");
101
+ finalRes = 'Response incomplete, please try again.';
102
+ }
103
+
104
+ return finalRes;
105
+ }
106
+
107
+ /**
108
+ * Embedding method (unchanged).
109
+ */
110
+ async embed(text) {
111
+ let model = this.model_name || 'nomic-embed-text';
112
+ let body = { model: model, prompt: text };
113
+ let res = await this.send(this.embedding_endpoint, body);
114
+ return res['embedding'];
115
+ }
116
+
117
+ /**
118
+ * Generic send method for local endpoint.
119
+ */
120
+ async send(endpoint, body) {
121
+ const url = new URL(endpoint, this.url);
122
+ const method = 'POST';
123
+ const headers = new Headers();
124
+ const request = new Request(url, {
125
+ method,
126
+ headers,
127
+ body: JSON.stringify(body)
128
+ });
129
+
130
+ let data = null;
131
+ try {
132
+ const res = await fetch(request);
133
+ if (res.ok) {
134
+ data = await res.json();
135
+ } else {
136
+ throw new Error(`Ollama Status: ${res.status}`);
137
+ }
138
+ } catch (err) {
139
+ console.error('Failed to send Ollama request.');
140
+ console.error(err);
141
+ throw err; // rethrow so we can catch it in the calling method
142
+ }
143
+ return data;
144
+ }
145
+ }