Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,967 Bytes
e4e0e54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import { ChatCompletionRequestMessage } from "openai"
import { GPTTokens } from "gpt-tokens"
import { openai } from "./openai.mts"
import { runModerationCheck } from "./runModerationCheck.mts"
import { getUserContent } from "./getUserContent.mts"
import { getTextPrompt } from "./getTextPrompt.mts"
export const createChatCompletion = async (
messages: ChatCompletionRequestMessage[],
model = "gpt-4"
): Promise<string> => {
// this is the part added by the user, and the one we need to check against the moderation API
const userContent = getUserContent(messages)
const check = await runModerationCheck(userContent)
if (check.flagged) {
console.error("Thoughtcrime: content flagged by the AI police", {
userContent,
moderationResult: check,
})
return "Thoughtcrime: content flagged by the AI police"
}
const rawPrompt = getTextPrompt(messages)
// for doc: https://www.npmjs.com/package/gpt-tokens
const usageInfo = new GPTTokens({
// Plus enjoy a 25% cost reduction for input tokens on GPT-3.5 Turbo (0.0015 per 1K input tokens)
plus : false,
model : "gpt-4",
messages: messages as any,
})
console.table({
"Tokens prompt": usageInfo.promptUsedTokens,
"Tokens completion": usageInfo.completionUsedTokens,
"Tokens total": usageInfo.usedTokens,
})
// Price USD: 0.000298
console.log("Price USD: ", usageInfo.usedUSD)
// const tokenLimit = 4000
const maxTokens = 4000 - usageInfo.promptUsedTokens
console.log("maxTokens:", maxTokens)
/*
console.log("settings:", {
tokenLimit,
promptLength: rawPrompt.length,
promptTokenLengh: rawPrompt.length / 1.9,
maxTokens
})
console.log("createChatCompletion(): raw prompt length:", rawPrompt.length)
console.log(
`createChatCompletion(): requesting ${maxTokens} of the ${tokenLimit} tokens availables`
)
*/
console.log("query:", {
model,
// messages,
user: "Anonymous User",
temperature: 0.7,
max_tokens: maxTokens,
// stop: preset.stop?.length ? preset.stop : undefined,
})
const response = await openai.createChatCompletion({
model,
messages,
// TODO use the Hugging Face Login username here
user: "Anonymous User",
temperature: 0.7,
// 30 tokens is about 120 characters
// we don't want more, as it will take longer to respond
max_tokens: maxTokens,
// stop: preset.stop?.length ? preset.stop : undefined,
})
const { choices } = response.data
if (!choices.length) {
console.log("createChatCompletion(): no choice found in the LLM response..")
return ""
}
const firstChoice = choices[0]
if (firstChoice?.message?.role !== "assistant") {
console.log(
"createChatCompletion(): something went wrong, the model imagined the user response?!"
)
return ""
}
console.log("createChatCompletion(): response", firstChoice.message.content)
return firstChoice.message.content || ""
} |