File size: 4,882 Bytes
9ad451d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361291c
9ad451d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import { strictFormat } from '../utils/text.js';

export class Local {
    constructor(model_name, url) {
        this.model_name = model_name;
        this.url = url || 'http://127.0.0.1:11434';
        this.chat_endpoint = '/api/chat';
        this.embedding_endpoint = '/api/embeddings';
    }

    /**
     * Main method to handle chat requests.
     */
    async sendRequest(turns, systemMessage) {
        // Choose the model name or default to 'llama3'
        const model = this.model_name || 'llama3';

        // Format messages and inject the system message at the front
        let messages = strictFormat(turns);
        messages.unshift({ role: 'system', content: systemMessage });
        console.log('Messages:', messages);

        // We'll do up to 5 attempts for "deepseek-r1" as well as Andy_3.5 if the <think> tags are mismatched
        const maxAttempts = 5;
        let attempt = 0;
        let finalRes = null;

        while (attempt < maxAttempts) {
            attempt++;
            console.log(`Awaiting local response... (model: ${model}, attempt: ${attempt})`);

            // Perform the actual request (wrapped in a try/catch)
            let res;
            try {
                const responseData = await this.send(this.chat_endpoint, {
                    model: model,
                    messages: messages,
                    stream: false
                });
                // The local endpoint apparently returns { message: { content: "..." } }
                res = responseData?.message?.content || 'No response data.';
            } catch (err) {
                // If context length exceeded and we have turns to remove, try again with one fewer turn
                if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
                    console.log('Context length exceeded, trying again with shorter context.');
                    return await this.sendRequest(turns.slice(1), systemMessage);
                } else {
                    console.log(err);
                    res = 'My brain disconnected, try again.';
                }
            }

            // If the model name includes "deepseek-r1", then we handle the <think> block
            if (this.model_name && this.model_name.includes("deepseek-r1") || this.model_name.includes("Andy_3.5")) {
                const hasOpenTag = res.includes("<think>");
                const hasCloseTag = res.includes("</think>");

                // If there's a partial mismatch, we regenerate the response
                if ((hasOpenTag && !hasCloseTag) || (!hasOpenTag && hasCloseTag)) {
                    console.warn("Partial <think> block detected. Re-generating...");
                    // Attempt another loop iteration to get a complete or no-think response
                    continue; 
                }
                // If both tags appear, remove them (and everything inside)
                if (hasOpenTag && hasCloseTag) {
                    res = res.replace(/<think>[\s\S]*?<\/think>/g, '');
                }
            }
            // We made it here with either a fully valid or not-needed to handle <think> scenario
            finalRes = res;
            break; // Break out of the while loop
        }
        // If after max attempts we STILL have partial tags, finalRes might be partial
        // Or we never set finalRes because all attempts threw partial tags
        if (finalRes == null) {
            // This means we kept continuing in the loop but never got a break
            console.warn("Could not get a valid <think> block or normal response after max attempts.");
            finalRes = 'Response incomplete, please try again.';
        }
        return finalRes;
    }

    /**
     * Embedding method (unchanged).
     */
    async embed(text) {
        let model = this.model_name || 'nomic-embed-text';
        let body = { model: model, prompt: text };
        let res = await this.send(this.embedding_endpoint, body);
        return res['embedding'];
    }

    /**
     * Generic send method for local endpoint.
     */
    async send(endpoint, body) {
        const url = new URL(endpoint, this.url);
        const method = 'POST';
        const headers = new Headers();
        const request = new Request(url, {
            method,
            headers,
            body: JSON.stringify(body)
        });

        let data = null;
        try {
            const res = await fetch(request);
            if (res.ok) {
                data = await res.json();
            } else {
                throw new Error(`Ollama Status: ${res.status}`);
            }
        } catch (err) {
            console.error('Failed to send Ollama request.');
            console.error(err);
            throw err; // rethrow so we can catch it in the calling method
        }
        return data;
    }
}