本文重点介绍使用 OpenAI 的 库构建一个用于训练数学达芬奇模型的  。 Node.js CLI 简而言之 “脚手架”我们的图书馆。 编写一组函数来包装 OpenAI 的 API 调用。 构建一个简单的 CLI 来调用函数。 证明 ChatGPT（通常）擅长数学。 证明达芬奇（通常）数学不好。 构建一个简单的微调数据集用于教授达芬奇数学。 上传“简单微调数据集”。 将“简单的微调数据集”变成简单的微调模型。 证明我们的微调教授了达芬奇数学。 脚手架 cd ~/Dev/YourRootFolderForPersonalStuff/ mdkir davinci-is-bad-at-maths cd davinci-is-bad-at-maths npm i dotenv openai npm i prettier -D touch .env touch goodAtMathsDatasetBuilder.js touch openAI.js mkdir bin touch bin/cli.js   package.json  ...可以很简单，像这样：   { "description": "Experiments using OpenAI's API NodeJs v4 library", "name": "davinci-is-bad-at-maths", "private": true, "bin": "./bin/cli.js", "dependencies": { "dotenv": "^16.3.1", "openai": "^4.0.0" }, "devDependencies": { "prettier": "^3.0.2" }, "main": "openAI.js", "scripts": { "cli": "node bin/cli.js", "prettier": "prettier --list-different --write \"**/*.{css,html,js,json,md,mjs,scss,ts,yaml}\"" }, "type": "module" } 脚本中的“cli”条目意味着我们可以调用  。如果您使用它而不是 这意味着即使您稍后更改应用程序结构或 的名称，您也可以保留 的历史记录。简单的事情吸引简单的头脑，我也有一个简单的头脑。 npm run cli -- commandName [args] node bin/cli.js commandName [args] cli.js shell   .env  ...必须看起来像这样，但使用您自己的 API_KEY：   OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo" OPENAI_MODEL="davinci" 一组用于包装 OpenAI 的 API 调用的函数。 打开 并将其复制到： openAI.js   /** A not-robust OpenAI v4 CLI; a playground for OpenAI v4 API calls; a utility for working with a OpenAI model who is really really, like - I mean - really bad at maths. * @usage * >> import commandHub from "openAI.js" * >> const [, , command, ...args] = process.argv * >> const commandFunc = commandHub[command] * >> commandFunc(...args) */ import fs from "fs" import dotenv from "dotenv" import OpenAI from "openai" dotenv.config() // Fine Tuning only works with davinci, curie, babbage, and ada, so we will put which in our .env file so that we can call the same one consistently. const model = process.env.OPENAI_MODEL // Instantiate the API object. const apiKey = process.env.OPENAI_API_KEY const openai = new OpenAI({ apiKey }) /** openai.chat.completions.create * @usage * >> npm run cli -- chatCompletionsCreate "2+8=?" * @param {String} chatPrompt your sum to an assistent who is (usually) good at maths */ export const chatCompletionsCreate = async chatPrompt => { const res = await openai.chat.completions.create({ messages: [ { role: "system", content: "You are good at maths." }, { role: "user", content: chatPrompt }, ], model: model, }) console.log("chatCompletionsCreate", res.choices) } /** openai.completions.create * @tutorial * Normally we would use `chatCompletionsCreate` but for Fine Tuned models we must use base models and therefore `completionsCreate`. * @usage * >> npm run cli -- completionsCreate "2+8=?" * @param {String} chatPrompt your sum to an assistent who is (usually) good at maths */ export const completionsCreate = async chatPrompt => { const res = await openai.completions.create({ model: model, prompt: chatPrompt, temperature: 0, }) console.log("completionsCreate", res) } /** openai.files.create and output to `openai.files.create.json` * @usage * >> npm run cli -- filesCreate bad-at-maths-fine-tuning-dataset.jsonl * @param {String} filePath of JSONLD file to upload. */ export const filesCreate = async filePath => { const res = await openai.files.create({ file: fs.createReadStream(filePath), purpose: "fine-tune", }) console.log("filesCreate", res) fs.writeFileSync( "openai.files.create.json", JSON.stringify(res, null, 2), "utf-8", ) } // openai.files.del /** openai.files.list and output to `openai.files.list.json` * @usage * >> npm run cli -- filesList */ export const filesList = async () => { const res = await openai.files.list() console.log("filesList", res) fs.writeFileSync( "openai.files.list.json", JSON.stringify(res, null, 2), "utf-8", ) } // openai.files.retrieve // openai.files.retrieveContent /** openai.fineTunes.create * @usage * >> npm run cli -- fineTunesCreate "bad-at-maths-fine-tuning-dataset.jsonl" "is-good-at-maths" * @param {String} fileId of previously uploaded file where `purpose: "fine-tune"`. * @param {String} suffix to add to the resulting model name for easily id later. */ export const fineTunesCreate = async (fileId, suffix) => { const res = await openai.fineTunes.create({ training_file: fileId, suffix: suffix, model: model, }) console.log("fineTunesCreate", res) fs.writeFileSync( "openai.fineTunes.create.json", JSON.stringify(res, null, 2), "utf-8", ) } /** openai.fineTunes.list * @usage * >> npm run cli -- fineTunesList */ export const fineTunesList = async () => { const res = await openai.fineTunes.list() console.log("fineTunesList", res) fs.writeFileSync( "openai.fineTunes.list.json", JSON.stringify(res, null, 2), "utf-8", ) } // openai.fineTunes.cancel // openai.fineTunes.retrieve // openai.fineTunes.listEvents // openai.models.del // openai.models.list // openai.models.del // openai.images.generate // openai.images.edit // openai.images.createVariation // openai.audio.transcriptions.create // openai.audio.translations.create // openai.edits.create // openai.embeddings.create // openai.moderations.create // A command hub. const commandHub = { chatCompletionsCreate, completionsCreate, filesCreate, filesList, fineTunesCreate, fineTunesList, } export default commandHub 您会注意到我已将 库中的所有可用端点保留在该文件中，我将其作为练习添加以创建有用的模块。 OpenAI 一个简单的 CLI 来调用函数 打开 bin/cli.js 并粘贴以下内容：   #!/usr/bin/env node /** A not-very-robust OpenAI v4 CLI; a playground for OpenAI v4 API calls; a utility for working with a OpenAI model who is really really, like - I mean - really bad at maths. * @usage with "cli" in "scripts" (don't forget the "--"). * >> npm cli -- commandName [arg1 arg2 ...arg(n)] */ import commandHub from "../openAI.js" const [, , command, ...args] = process.argv // Call the requested command. Not a robust CLI but it gets the job done! if (!commandHub.hasOwnProperty(command)) { throw "No such command as `" + command + "`" } else { const commandFunc = commandHub[command] commandFunc(...args) } 证明 ChatGPT（通常）擅长数学 ChatGPT 在回答任何求和时应该没有问题，因为（通常）ChatGPT 擅长数学，我们可以通过执行以下操作来证明（并测试我们的 CLI）： 编辑 .env 说：   OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo" OPENAI_MODEL="gpt-3.5-turbo" 运行命令：   npm run cli -- chatCompletionsCreate "12+4`. 看？擅长数学。 稍后，当可以微调“gpt-3.5-turbo”等聊天机器人模型时，我们会将其微调为不擅长数学。   是确保参数正确传递到 NPM 所必需的。我不会深究为什么，因为我不知道为什么。你可能会。那挺好的。如果你知道请告诉我。我所知道的是，你必须这样做才能使其发挥作用，这是事实。 -- 部分 注意：这是您在 CLI 之外执行相同操作的方法：   import dotenv from "dotenv" import OpenAI from "openai" const apiKey = process.env.OPENAI_API_KEY const model = process.env.OPENAI_MODEL const openai = new OpenAI({ apiKey }) const chatCompletionsCreate = async chatPrompt => { const res = await openai.chat.completions.create({ messages: [ { role: "system", content: "You are good at maths." }, { role: "user", content: chatPrompt }, ], model: model, }) console.log("chatCompletionsCreate", res.choices) } chatCompletionsCreate("12+4") 证明达芬奇（通常）数学不好。 编辑 .env 说：   OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo" OPENAI_MODEL="davinci" 运行命令 npm run cli -- completionsCreate "12+4`. 注意：这是您在 CLI 之外执行相同操作的方法：   import fs from "fs" import dotenv from "dotenv" import OpenAI from "openai" const apiKey = process.env.OPENAI_API_KEY const openai = new OpenAI({ apiKey }) const completionsCreate = async chatPrompt => { const res = await openai.completions.create({ model: model, prompt: chatPrompt, temperature: 0, }) console.log("completionsCreate", res) } completionsCreate("12+4") 教授达芬奇数学 根据文档，模型的“微调”ChatGPT 需要大型数据集，至少 200 个 的全部要点是 如何创建、上传和使用“微调”数据集并快捷地实际上 在构建一个有用而不是愚蠢的数据集。 。davinci-is-bad-at-maths 学习 是 由于我们是编码员，我们可以编写这样的快捷方式： 打开 并粘贴以下内容： goodAtMathsDatasetBuilder.js   import fs from "fs" // Don't waste bandwidth with duplicates in the fine-training data. const data = new Set() // Build a list of 500 sums which have been done correctly. while (data.size < 500) { // Two random integers. let x = Math.round(Math.random() * 1000) let y = Math.round(Math.random() * 1000) let result = x + y data.add( JSON.stringify({ prompt: `${x}+${y}\n\n###\n\n`, completion: `${x}+${y}=${result} END`, }), ) } fs.writeFileSync( "good-at-maths-fine-tuning-dataset.jsonl", [...data].join("\n"), "utf-8", ) console.log("JSONL fine-tuning dataset has been created.") 我们在这里所做的就是构建一个数据集，“微调”ChatGPT 模型以擅长数学，而我们所需要的只是大量正确的“完成”总和。 像这样运行这个脚本：   node goodAtMathsDatasetBuilder.js` 打开  ，它应该如下所示： good-at-maths-fine-tuning-dataset.jsonl   {"prompt":"487+63\n\n###\n\n","completion":"487+63=550 END"} {"prompt":"842+624\n\n###\n\n","completion":"842+624=1466 END"} {"prompt":"58+783\n\n###\n\n","completion":"58+783=841 END"} {"prompt":"96+478\n\n###\n\n","completion":"96+478=574 END"} {"prompt":"69+401\n\n###\n\n","completion":"69+401=470 END"}  ...更多正确的金额。 上传“简单微调数据集”。 要上传数据集，请运行 npm run cli -- filesCreate good-at-maths-fine-tuning-dataset.jsonl 注意：这是您在 CLI 之外执行相同操作的方法：   import fs from "fs" import dotenv from "dotenv" import OpenAI from "openai" const apiKey = process.env.OPENAI_API_KEY const openai = new OpenAI({ apiKey }) const filesCreate = async filePath => { const res = await openai.files.create({ file: fs.createReadStream(filePath), purpose: "fine-tune", }) console.log("filesCreate", res) fs.writeFileSync( "openai.files.create.json", JSON.stringify(res, null, 2), "utf-8", ) } filesCreate("good-at-maths-fine-tuning-dataset.jsonl") 记下文件  ，例如“file-th15IsM1ne3G3tY0urOwn1Yo” id 将“简单的微调数据集”变成简单的微调模型 要使用此数据集调用创建“微调”模型：   npm run cli -- fineTunesCreate "file-th15IsM1ne3G3tY0urOwn1Yo"`"is-good-at-maths" 注意：这是您在 CLI 之外执行相同操作的方法：   import fs from "fs" import dotenv from "dotenv" import OpenAI from "openai" const apiKey = process.env.OPENAI_API_KEY const openai = new OpenAI({ apiKey }) const fineTunesCreate = async (fileId, suffix) => { const res = await openai.fineTunes.create({ training_file: fileId, suffix: suffix, model: model, }) console.log("fineTunesCreate", res) fs.writeFileSync( "openai.fineTunes.create.json", JSON.stringify(res, null, 2), "utf-8", ) } fineTunesCreate("file-th15IsM1ne3G3tY0urOwn1Yo") 教达芬奇数学需要一段时间，因为说实话，达芬奇数学真的很糟糕！ 你可以运行：   npm run cli -- fineTunesList 等待 更改为 status: 'pending' status: 'suceeded' 证明我们的微调教授了达芬奇数学 当 时，找到 名称。 status: 'suceeded' fine_tuned_model 编辑 .env 说：   OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo" OPENAI_MODEL="<fine_tuned_model name>" 跑步：   npm run cli -- completionsCreate "12+4`. 这是一个做作的回答，但你应该看到达芬奇更擅长数学。 我们学到了什么 如何使用 OpenAI 的 V4 库。 如何创建“Fine Tuning”数据集并上传。 如何生成新的 OpenAI 模型。 如何编写蹩脚的 CLI。 该项目可以在这里找到：   https://gitlab.com/timitee/davinci-is-bad-at-maths/edit#js-general-project-settings

Walkthroughs, tutorials, guides, and tips. This story will teach you how to do something new or how to do something better.

Follow the elioWay

Read My Stories

該音頻是用故事的原始語言製作的！

达芬奇数学不好：使用 NodeJs 和 OpenAI v4 微调 ChatGPT 模型

About Author

註釋

標籤

这篇文章刊登在

Related Stories

Telegram：加密岛通往大陆的桥梁

加密货币增长：创建有效的用户角色

成功云迁移的完整指南：策略和最佳实践

扬帆起航：利用数据湖开发生产级 RAG 应用程序

Telegram：加密岛通往大陆的桥梁

加密货币增长：创建有效的用户角色

成功云迁移的完整指南：策略和最佳实践

扬帆起航：利用数据湖开发生产级 RAG 应用程序

Light-Mode

Classic

Newspaper

Dark-Mode

Neon Noir

Minty

HN StartUps