Spaces:

jbilcke-hf
/

VideoChain-API

Running on CPU Upgrade

File size: 2,967 Bytes

e4e0e54

import { ChatCompletionRequestMessage } from "openai"
import { GPTTokens } from "gpt-tokens"

import { openai } from "./openai.mts"
import { runModerationCheck } from "./runModerationCheck.mts"
import { getUserContent } from "./getUserContent.mts"
import { getTextPrompt } from "./getTextPrompt.mts"

export const createChatCompletion = async (
  messages: ChatCompletionRequestMessage[],
  model = "gpt-4"
): Promise<string> => {
  // this is the part added by the user, and the one we need to check against the moderation API
  const userContent = getUserContent(messages)

  const check = await runModerationCheck(userContent)

  if (check.flagged) {
    console.error("Thoughtcrime: content flagged by the AI police", {
      userContent,
      moderationResult: check,
    })
    return "Thoughtcrime: content flagged by the AI police"
  }

  const rawPrompt = getTextPrompt(messages)


  // for doc: https://www.npmjs.com/package/gpt-tokens
  const usageInfo = new GPTTokens({
    // Plus enjoy a 25% cost reduction for input tokens on GPT-3.5 Turbo (0.0015 per 1K input tokens)
    plus    : false,
    model   : "gpt-4",
    messages: messages as any,
  })

  console.table({
    "Tokens prompt": usageInfo.promptUsedTokens,
    "Tokens completion": usageInfo.completionUsedTokens,
    "Tokens total": usageInfo.usedTokens,
  })

  // Price USD:  0.000298
  console.log("Price USD: ", usageInfo.usedUSD)

  // const tokenLimit = 4000

  const maxTokens = 4000 - usageInfo.promptUsedTokens

  console.log("maxTokens:", maxTokens)
  /*
  console.log("settings:", {
    tokenLimit,
    promptLength: rawPrompt.length,
    promptTokenLengh: rawPrompt.length / 1.9,
    maxTokens
  })

  console.log("createChatCompletion(): raw prompt length:", rawPrompt.length)
  console.log(
    `createChatCompletion(): requesting ${maxTokens} of the ${tokenLimit} tokens availables`
  )
  */

  console.log("query:", {
    model,
    // messages,
    user: "Anonymous User",
    temperature: 0.7,
    max_tokens: maxTokens,
    // stop: preset.stop?.length ? preset.stop : undefined,
  })

  const response = await openai.createChatCompletion({
    model,
    messages,
    // TODO use the Hugging Face Login username here
    user: "Anonymous User",
    temperature: 0.7,

    // 30 tokens is about 120 characters
    // we don't want more, as it will take longer to respond
    max_tokens: maxTokens,
    // stop: preset.stop?.length ? preset.stop : undefined,
  })

  const { choices } = response.data

  if (!choices.length) {
    console.log("createChatCompletion(): no choice found in the LLM response..")
    return ""
  }
  const firstChoice = choices[0]

  if (firstChoice?.message?.role !== "assistant") {
    console.log(
      "createChatCompletion(): something went wrong, the model imagined the user response?!"
    )
    return ""
  }

  console.log("createChatCompletion(): response", firstChoice.message.content)

  return firstChoice.message.content || ""
}