This article focuses on using OpenAI's Node.js library to build a CLI that trains the Davinci model in mathematics.
cd ~/Dev/YourRootFolderForPersonalStuff/
mdkir davinci-is-bad-at-maths
cd davinci-is-bad-at-maths
npm i dotenv openai
npm i prettier -D
touch .env
touch goodAtMathsDatasetBuilder.js
touch openAI.js
mkdir bin
touch bin/cli.js
package.json
... can be simple, like this:
{
"description": "Experiments using OpenAI's API NodeJs v4 library",
"name": "davinci-is-bad-at-maths",
"private": true,
"bin": "./bin/cli.js",
"dependencies": {
"dotenv": "^16.3.1",
"openai": "^4.0.0"
},
"devDependencies": {
"prettier": "^3.0.2"
},
"main": "openAI.js",
"scripts": {
"cli": "node bin/cli.js",
"prettier": "prettier --list-different --write \"**/*.{css,html,js,json,md,mjs,scss,ts,yaml}\""
},
"type": "module"
}
The "cli" entry in scripts means we can call npm run cli -- commandName [args]
. If you use this instead of node bin/cli.js commandName [args]
it means you maintain your shell's history even if you change the app structure later, or the name of cli.js
. Simple things please simple minds and I have a simple mind.
.env
... must look like this but with your own API_KEY:
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="davinci"
Open openAI.js
and copy this in:
/** A not-robust OpenAI v4 CLI; a playground for OpenAI v4 API calls; a utility for working with a OpenAI model who is really really, like - I mean - really bad at maths.
* @usage
* >> import commandHub from "openAI.js"
* >> const [, , command, ...args] = process.argv
* >> const commandFunc = commandHub[command]
* >> commandFunc(...args)
*/
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
dotenv.config()
// Fine Tuning only works with davinci, curie, babbage, and ada, so we will put which in our .env file so that we can call the same one consistently.
const model = process.env.OPENAI_MODEL
// Instantiate the API object.
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
/** openai.chat.completions.create
* @usage
* >> npm run cli -- chatCompletionsCreate "2+8=?"
* @param {String} chatPrompt your sum to an assistent who is (usually) good at maths */
export const chatCompletionsCreate = async chatPrompt => {
const res = await openai.chat.completions.create({
messages: [
{ role: "system", content: "You are good at maths." },
{ role: "user", content: chatPrompt },
],
model: model,
})
console.log("chatCompletionsCreate", res.choices)
}
/** openai.completions.create
* @tutorial
* Normally we would use `chatCompletionsCreate` but for Fine Tuned models we must use base models and therefore `completionsCreate`.
* @usage
* >> npm run cli -- completionsCreate "2+8=?"
* @param {String} chatPrompt your sum to an assistent who is (usually) good at maths */
export const completionsCreate = async chatPrompt => {
const res = await openai.completions.create({
model: model,
prompt: chatPrompt,
temperature: 0,
})
console.log("completionsCreate", res)
}
/** openai.files.create and output to `openai.files.create.json`
* @usage
* >> npm run cli -- filesCreate bad-at-maths-fine-tuning-dataset.jsonl
* @param {String} filePath of JSONLD file to upload. */
export const filesCreate = async filePath => {
const res = await openai.files.create({
file: fs.createReadStream(filePath),
purpose: "fine-tune",
})
console.log("filesCreate", res)
fs.writeFileSync(
"openai.files.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
// openai.files.del
/** openai.files.list and output to `openai.files.list.json`
* @usage
* >> npm run cli -- filesList */
export const filesList = async () => {
const res = await openai.files.list()
console.log("filesList", res)
fs.writeFileSync(
"openai.files.list.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
// openai.files.retrieve
// openai.files.retrieveContent
/** openai.fineTunes.create
* @usage
* >> npm run cli -- fineTunesCreate "bad-at-maths-fine-tuning-dataset.jsonl" "is-good-at-maths"
* @param {String} fileId of previously uploaded file where `purpose: "fine-tune"`.
* @param {String} suffix to add to the resulting model name for easily id later. */
export const fineTunesCreate = async (fileId, suffix) => {
const res = await openai.fineTunes.create({
training_file: fileId,
suffix: suffix,
model: model,
})
console.log("fineTunesCreate", res)
fs.writeFileSync(
"openai.fineTunes.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
/** openai.fineTunes.list
* @usage
* >> npm run cli -- fineTunesList */
export const fineTunesList = async () => {
const res = await openai.fineTunes.list()
console.log("fineTunesList", res)
fs.writeFileSync(
"openai.fineTunes.list.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
// openai.fineTunes.cancel
// openai.fineTunes.retrieve
// openai.fineTunes.listEvents
// openai.models.del
// openai.models.list
// openai.models.del
// openai.images.generate
// openai.images.edit
// openai.images.createVariation
// openai.audio.transcriptions.create
// openai.audio.translations.create
// openai.edits.create
// openai.embeddings.create
// openai.moderations.create
// A command hub.
const commandHub = {
chatCompletionsCreate,
completionsCreate,
filesCreate,
filesList,
fineTunesCreate,
fineTunesList,
}
export default commandHub
You'll notice I have left all the available endpoints in OpenAI's library in this file, which I leave for you to add as an exercise to create a useful module.
Open bin/cli.js and paste this:
#!/usr/bin/env node
/** A not-very-robust OpenAI v4 CLI; a playground for OpenAI v4 API calls; a utility for working with a OpenAI model who is really really, like - I mean - really bad at maths.
* @usage with "cli" in "scripts" (don't forget the "--").
* >> npm cli -- commandName [arg1 arg2 ...arg(n)]
*/
import commandHub from "../openAI.js"
const [, , command, ...args] = process.argv
// Call the requested command. Not a robust CLI but it gets the job done!
if (!commandHub.hasOwnProperty(command)) {
throw "No such command as `" + command + "`"
} else {
const commandFunc = commandHub[command]
commandFunc(...args)
}
ChatGPT should have no problems answering any sums because (usually) ChatGPT is good at maths, which we can prove (and test our CLI) by doing the following:
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="gpt-3.5-turbo"
npm run cli -- chatCompletionsCreate "12+4`.
See? Good at maths.
At a later date, when it becomes possible to Fine Tune chatbot models like "gpt-3.5-turbo", we will Fine Tune it to be bad at maths.
The --
part is required to ensure the parameters are passed correctly into NPM. I won't go into why because I don't know why. You might. That's good. Let me know if you know. All I know is that you have to do it to make it work and that's a fact.
NB: This is how you would do the same thing outside of our CLI:
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const model = process.env.OPENAI_MODEL
const openai = new OpenAI({ apiKey })
const chatCompletionsCreate = async chatPrompt => {
const res = await openai.chat.completions.create({
messages: [
{ role: "system", content: "You are good at maths." },
{ role: "user", content: chatPrompt },
],
model: model,
})
console.log("chatCompletionsCreate", res.choices)
}
chatCompletionsCreate("12+4")
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="davinci"
npm run cli -- completionsCreate "12+4`.
NB: This is how you would do the same thing outside of our CLI:
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
const completionsCreate = async chatPrompt => {
const res = await openai.completions.create({
model: model,
prompt: chatPrompt,
temperature: 0,
})
console.log("completionsCreate", res)
}
completionsCreate("12+4")
As per the documentation, "Fine Tuning" ChatGPT of models requires large datasets, at least 200. The whole point of davinci-is-bad-at-maths is learn how to create, upload and use "Fine Tuning" datasets and shortcut the work actually BUILDING a useful rather-than-silly dataset.
And since we are coders, we can code a shortcut like this:
Open goodAtMathsDatasetBuilder.js
and paste this:
import fs from "fs"
// Don't waste bandwidth with duplicates in the fine-training data.
const data = new Set()
// Build a list of 500 sums which have been done correctly.
while (data.size < 500) {
// Two random integers.
let x = Math.round(Math.random() * 1000)
let y = Math.round(Math.random() * 1000)
let result = x + y
data.add(
JSON.stringify({
prompt: `${x}+${y}\n\n###\n\n`,
completion: `${x}+${y}=${result} END`,
}),
)
}
fs.writeFileSync(
"good-at-maths-fine-tuning-dataset.jsonl",
[...data].join("\n"),
"utf-8",
)
console.log("JSONL fine-tuning dataset has been created.")
All we're doing here is building a data set that "Fine Tunes" ChatGPT models to be good at maths, and all we need is lots of sums with "completions" which are correct.
Run this script like this:
node goodAtMathsDatasetBuilder.js`
Open good-at-maths-fine-tuning-dataset.jsonl
and it should look like this:
{"prompt":"487+63\n\n###\n\n","completion":"487+63=550 END"}
{"prompt":"842+624\n\n###\n\n","completion":"842+624=1466 END"}
{"prompt":"58+783\n\n###\n\n","completion":"58+783=841 END"}
{"prompt":"96+478\n\n###\n\n","completion":"96+478=574 END"}
{"prompt":"69+401\n\n###\n\n","completion":"69+401=470 END"}
... with more sums that are right.
To upload the dataset, run
npm run cli -- filesCreate good-at-maths-fine-tuning-dataset.jsonl
NB: This is how you would do the same thing outside of our CLI:
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
const filesCreate = async filePath => {
const res = await openai.files.create({
file: fs.createReadStream(filePath),
purpose: "fine-tune",
})
console.log("filesCreate", res)
fs.writeFileSync(
"openai.files.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
filesCreate("good-at-maths-fine-tuning-dataset.jsonl")
Take note of the file id
, e.g. "file-th15IsM1ne3G3tY0urOwn1Yo"
To create a "Fine Tuned" model using this dataset call:
npm run cli -- fineTunesCreate "file-th15IsM1ne3G3tY0urOwn1Yo"`"is-good-at-maths"
NB: This is how you would do the same thing outside of our CLI:
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
const fineTunesCreate = async (fileId, suffix) => {
const res = await openai.fineTunes.create({
training_file: fileId,
suffix: suffix,
model: model,
})
console.log("fineTunesCreate", res)
fs.writeFileSync(
"openai.fineTunes.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
fineTunesCreate("file-th15IsM1ne3G3tY0urOwn1Yo")
It takes a while to teach Davinci maths because, to be honest, DaVinci is really bad at maths!
You can run:
npm run cli -- fineTunesList
Wait until status: 'pending'
changes to status: 'suceeded'
When status: 'suceeded'
, find the fine_tuned_model
name.
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="<fine_tuned_model name>"
npm run cli -- completionsCreate "12+4`.
It's a hokey response, but you should see that Davinci is better at maths.
This project can be found here:
https://gitlab.com/timitee/davinci-is-bad-at-maths/edit#js-general-project-settings