import { Bindings } from './extras'; import { AiModel } from './types/ai'; import { ChatCompletionRequestMessage, CreateCompletionResponseUsage } from 'openai'; import { encodingForModel } from 'js-tiktoken'; export class UsageTracking { protected ctx: ExecutionContext; private dataCloudflareRequest: AnalyticsEngineDataset; private dataCloudflareKv: AnalyticsEngineDataset; private dataCloudflareD1: AnalyticsEngineDataset; private dataOpenaiLm: AnalyticsEngineDataset; constructor(env: Bindings, ctx: ExecutionContext) { this.ctx = ctx; this.dataCloudflareRequest = env.TRACKING_CLOUDFLARE_REQUEST; this.dataCloudflareKv = env.TRACKING_CLOUDFLARE_KV; this.dataCloudflareD1 = env.TRACKING_CLOUDFLARE_D1; this.dataOpenaiLm = env.TRACKING_OPENAI_LM; } /** * Calculates the number of tokens used by OpenAI's model in processing the messages. * * @param messages - An array of ChatCompletionRequestMessages to be processed. * @param model - The AI model used. * @param isReply - A flag indicating whether the processed message is a reply. * * @returns The number of tokens used. * * @remarks The method takes into account the token counts of both the content and role/name of the message, which are model-dependent. The tokens per message and per name are different for 'gpt-3.5' and 'gpt-4' models. For every reply, an additional 3 tokens are added for the prime sequence. If the message is a reply, an extra 8 tokens are deducted (explained in https://github.com/ceifa/tiktoken-node/issues/7#issuecomment-1496876173). * * Note: If the model used is not 'gpt-3.5' or 'gpt-4', the method throws an error. * For information on how messages are converted to tokens for different models, see https://github.com/openai/openai-python/blob/main/chatml.md. * * @see https://github.com/ceifa/tiktoken-node/issues/7#issuecomment-1496876173 */ public static openAiTokensFromMessages(messages: Array, model: AiModel, isReply = false): number { const encoding = encodingForModel(model); let tokens_per_message = 0; let tokens_per_name = 0; let num_tokens = 0; if (model.startsWith('gpt-3.5')) { tokens_per_message = 4; // every message follows <|start|>{role/name}\n{content}<|end|>\n tokens_per_name = -1; // if there's a name, the role is omitted } else if (model.startsWith('gpt-4')) { tokens_per_message = 3; tokens_per_name = 1; } else { throw new Error(`openAiTokensFromMessages() is not implemented for model ${model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.`); } for (const message of messages) { num_tokens += tokens_per_message; Object.keys(message).forEach((key) => { // @ts-ignore let value = message[key]; let encoded = encoding.encode(value); num_tokens += encoded.length; if (key == 'name') { num_tokens += tokens_per_name; } }); } num_tokens += 3; // every reply is primed with <| start |> assistant <| message |> return num_tokens + (isReply ? -8 : 0); } /** * Logs usage data for OpenAI language model (LM) API calls for billing purposes. * * @param data - Object containing information about the usage of the AI model. * @param data.aiId - The unique identifier of the AI project the billing should be assigned to. * @param data.model - The AI model used for the operation. * @param data.tokensUsed - The number of tokens used during the operation. * * @remarks * Note that in the initial version of the Analytics Engine, blobs and doubles are accessed via names that have 1-based indexing. */ public openAiLm(data: { aiRequestId: string; aiId: string; model: AiModel; tokenUsage: CreateCompletionResponseUsage }) { try { this.dataOpenaiLm.writeDataPoint({ indexes: [data.aiRequestId], blobs: [data.aiId, data.model], doubles: [data.tokenUsage.total_tokens, data.tokenUsage.prompt_tokens, data.tokenUsage.completion_tokens], }); } catch (error) { console.error(error); } } }