Wobot.ai is a Video Intelligence Platform that enables businesses to do more with their existing camera systems.
pip install tensorflow
pip install flask
pip install tritonclient[all] --extra-index-url https://pypi.ngc.nvidia.com
pip install opencv-python
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = keras.Sequential(
[
keras.Input(shape=input_shape, name="input_1"),
layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Flatten(),
layers.Dropout(0.5),
layers.Dense(num_classes, activation="softmax", name="output_1"),
]
)
model.summary()
batch_size = 128
epochs = 15
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])
tf.keras.models.save_model(model, "mnist_model")
from flask import Flask, redirect, jsonify, request
import cv2, os, sys, imutils
import numpy as np
import tensorflow as tf
MODEL_PATH = 'mnist_model'
app = Flask(__name__)
app.config['DEBUG'] = False
# Load the Model
model = tf.keras.models.load_model(MODEL_PATH)
@app.route('/mnist_infer', methods=['POST'])
def hand_classifier():
# Receive the encoded frame and convert it back to a Numpy Array
encoded_image = np.frombuffer(request.data, np.uint8)
image = cv2.imdecode(encoded_image, -1) # Decode image without converting to BGR
image = np.expand_dims([image], axis=-1) # Add dimensions to create appropriate tensor shapes
# Run inference on the frame
hand = model.predict(image)
return jsonify(str(np.argmax(hand))) # Because only string can be converted to JSON
if __name__ == '__main__':
app.run(host='0.0.0.0', port='5000', threaded=True, use_reloader=False)
import cv2, random, requests
import numpy as np
from glob import glob
from tqdm import tqdm
TEST_MNIST_PATHS = glob("/media/ActiveTraining/WOBOT/data/MNIST Dataset JPG format/MNIST - JPG - testing/*/*.jpg")
MODEL_ENDPOINT = "http://0.0.0.0:5000/mnist_infer"
NUM_SAMPLES = 1000
INPUT_SHAPE = (28, 28)
# Choose random images from the test set
path_choices = random.choices(TEST_MNIST_PATHS, k=NUM_SAMPLES)
def preprocess_image(image):
image = cv2.resize(image, INPUT_SHAPE) # This step is not strictly required for MNIST
image = image / 255 # Normalize Image
return image
accuracy = list()
# Loop though each image and run inference
for test_mnist_path in tqdm(path_choices):
label = int(test_mnist_path.split("/")[-2]) # Get the label
image = cv2.imread(test_mnist_path, -1) # Read image without converting to BGR
image = preprocess_image(image) # Preprocess the image
# Encode the image as JPG and Send to the Model Server
_, img_encoded = cv2.imencode('.jpg', image)
response = requests.post(MODEL_ENDPOINT, data=img_encoded.tobytes())
pred = int(response.json()) # Decode the response to get the predictions
accuracy.append(pred == label)
print(f"Testing Accuracy: {np.mean(accuracy)*100:.2f}%")
<model-name>/1/model.savedmodel/<SavedModel-contents>
. The detailed instructions can be found in the official readme provided by Nvidia. In summary, the new directory structure should look something like this (The .py
files are the python scripts discussed in this article):.
├── mnist_model
│ ├── 1
│ │ └── model.savedmodel
│ │ ├── assets
│ │ ├── keras_metadata.pb
│ │ ├── saved_model.pb
│ │ └── variables
│ │ ├── variables.data-00000-of-00001
│ │ └── variables.index
├── flask_client.py
├── flask_server.py
├── train_mnist.py
└── triton_client.py
docker run --gpus=all --rm -it -p 8000-8002:8000-8002 --name triton_server -v $PWD:/models nvcr.io/nvidia/tritonserver:21.02-py3 tritonserver --model-repository=/models --strict-model-config=false
--gpus=all
flag only if you have a GPU and have nvidia-docker installed. Else skip it if you want to run CPU inference (slower).--rm
(optional) flag if you want the container to be deleted once you stop the server--it
(optional) flag to view the container logs and stop them using keyboard interrupt (Ctrl+C
)--p
flag to map the ports from inside the container to your host system. Without this, the inference will not take place since the program on the host system will not have access to the port inside the container.--name
(optional) to identify the container with a chosen name. Otherwise, a random one will automatically be assigned.-v
to mount the volumes into the container. Here $PWD
points to the current directory, assuming your model is located in the same directory.nvcr.io/nvidia/tritonserver:21.02-py3
--model-repository=/models
points to the directory containing the model name and the files inside it. (Directory structure is very important)strict-model-config=false
: This flag allows the Triton Server to auto-configure for the given model. Alternatively, a specific config.pbtxt
can be created to specify the config. But that is again beyond the scope of this article, and more information can be found here: Triton Model Configuration Readme.http://<host-ip-address>:<mapped-http-port>/v2/models/<model-name>/config
.import cv2, random
import numpy as np
from glob import glob
from tqdm import tqdm
import tritonclient.grpc as grpcclient
# GLOBAL VARIABLES
TEST_MNIST_PATHS = glob("/media/ActiveTraining/WOBOT/data/MNIST Dataset JPG format/MNIST - JPG - testing/*/*.jpg")
NUM_SAMPLES = 1000
INPUT_SHAPE = (28, 28)
# Triton Variables
TRITON_IP = "localhost"
TRITON_PORT = 8001
MODEL_NAME = "mnist_model"
INPUTS = []
OUTPUTS = []
INPUT_LAYER_NAME = "input_1"
OUTPUT_LAYER_NAME = "output_1"
# Choose random images from the test set
path_choices = random.choices(TEST_MNIST_PATHS, k=NUM_SAMPLES)
def preprocess_image(image):
image = cv2.resize(image, INPUT_SHAPE) # This step is not strictly required for MNIST
image = image / 255 # Normalize Image
image = np.expand_dims([image], axis=-1) # Increase the dimensions to match that of the model
return image.astype(np.float32)
def postprocess_output(preds):
return np.argmax(np.squeeze(preds))
accuracy = list()
# Triton Initializations
INPUTS.append(grpcclient.InferInput(INPUT_LAYER_NAME, [1, INPUT_SHAPE[0], INPUT_SHAPE[1], 1], "FP32"))
OUTPUTS.append(grpcclient.InferRequestedOutput(OUTPUT_LAYER_NAME))
TRITON_CLIENT = grpcclient.InferenceServerClient(url=f"{TRITON_IP}:{TRITON_PORT}")
# Loop though each image and run inference
for test_mnist_path in tqdm(path_choices):
label = int(test_mnist_path.split("/")[-2]) # Get the label
image = cv2.imread(test_mnist_path, -1) # Read image without converting to BGR
image = preprocess_image(image) # Preprocess the image
# Run the Inference using the gRPC triton client
INPUTS[0].set_data_from_numpy(image) # Set the Inputs
result = TRITON_CLIENT.infer(model_name=MODEL_NAME, inputs=INPUTS, outputs=OUTPUTS, headers={}) # Run Inference
output = np.squeeze(result.as_numpy(OUTPUT_LAYER_NAME)) # Process the Outputs
pred = postprocess_output(output) # Postprocess (Argmax)
# Record Accuracy
accuracy.append(pred == label)
print(f"Testing Accuracy: {np.mean(accuracy)*100:.2f}%")