Whitney Knitter
Published © GPL3+

Practicing Yoga with AI: Human Pose Estimation on the TDA4VM

This project shows how I used TI's SK-TDA4VM and Edge AI Cloud tool to train a human pose estimation ML model to judge my yoga practice.

IntermediateFull instructions provided3 hours1,542
Practicing Yoga with AI: Human Pose Estimation on the TDA4VM

Things used in this project

Hardware components

SK-TDA4VM Edge AI starter kit
Texas Instruments SK-TDA4VM Edge AI starter kit
×1
Nekteck 60W USB C Charger
×1
Audio / Video Cable Assembly, Ultra Slim RedMere HDMI to HDMI
Audio / Video Cable Assembly, Ultra Slim RedMere HDMI to HDMI
×1
Ethernet Cable, Cat6a
Ethernet Cable, Cat6a
×1
USB-A to Micro-USB Cable
USB-A to Micro-USB Cable
×1
Webcam, Logitech® HD Pro
Webcam, Logitech® HD Pro
×1

Story

Read more

Code

yoga-pose.py

Python
Jupyter notebook from Edge AI Studio
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import os
import re
import sys
import cv2
import tqdm
import onnx
import math
import copy
import shutil
import platform
import itertools

import numpy as np
import onnxruntime as rt
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from pathlib import Path
from munkres import Munkres
from numpy.lib.stride_tricks import as_strided
from IPython.display import Markdown as md
from PIL import Image, ImageFont, ImageDraw, ImageEnhance
from scripts.utils import imagenet_class_to_name, download_model, loggerWritter, get_svg_path, get_preproc_props, single_img_visualise, vis_pose_result


# In[2]:


def preprocess_for_onnx_pose_estimation(image_path, size, mean, scale, layout, reverse_channels, pad_color=114, pad_type="center"):
    # Step 1
    # read the image using openCVimport json_tricks as json
    img = cv2.imread(image_path)
    
    # Step 2
    # convert to RGB
    img = img[:,:,::-1]
    
    # Step 3    
    # Most of the onnx models are trained using
    # 512x512 images. The general rule of thumb
    # is to scale the input image while preserving
    # the original aspect ratio so that the
    # longer edge is 512 pixels, and then
    # pad the scaled image to 512x512
    
    size = (size,size) if not isinstance(size, (list,tuple)) else size
    desired_size = size[-1]
    old_size = img.shape[:2] # old_size is in (height, width) format

    ratio = float(desired_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])

    # new_size should be in (width, height) format
    img = cv2.resize(img, (new_size[1], new_size[0]))

    delta_w = size[1] - new_size[1]
    delta_h = size[0] - new_size[0]

    if pad_type=="corner":
        top, left = 0, 0
        bottom, right = delta_h, delta_w
    else:
        delta_w = size[1] - new_size[1]
        delta_h = size[0] - new_size[0]
        top, bottom = delta_h//2, delta_h-(delta_h//2)
        left, right = delta_w//2, delta_w-(delta_w//2)


    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
        value=pad_color)
    
    # Step 4
    # Apply scaling and mean subtraction.
    # if your model is built with an input
    # normalization layer, then you might
    # need to skip this
    if mean is not None and scale is not None:
        img = img.astype('float32')
        for mean, scale, ch in zip(mean, scale, range(img.shape[2])):
            img[:,:,ch] = ((img.astype('float32')[:,:,ch] - mean) * scale)
            
    # Step 5
    if reverse_channels:
        img = img[:,:,::-1]
    
    # Step 6
    img = np.expand_dims(img,axis=0)
    img = np.transpose(img, (0, 3, 1, 2))
    
    return img, top, left, ratio


# In[4]:


calib_images = [
    '/home/root/notebooks/side_bend_pose/image-640x640_0.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_1.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_2.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_3.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_4.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_5.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_6.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_7.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_8.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_9.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_10.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_11.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_12.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_13.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_14.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_15.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_16.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_17.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_18.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_19.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_20.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_21.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_22.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_23.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_24.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_25.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_26.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_27.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_28.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_29.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_30.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_31.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_32.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_33.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_34.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_35.png',
    '/home/root/notebooks/side_bend_pose/image-640x640_36.png',
]

output_dir = '/home/root/notebooks/custom-artifacts/onnx/yolox_s_pose_ti_lite_49p5_78p0.onnx'
onnx_model_path_EdgeAIcloud = '/home/root/notebooks/prebuilt-models/8bits/kd-7060_onnxrt_coco_edgeai-yolox_yolox_s_pose_ti_lite_49p5_78p0_onnx/model/yolox_s_pose_ti_lite_49p5_78p0.onnx'
onnx.shape_inference.infer_shapes_path(onnx_model_path_EdgeAIcloud, onnx_model_path_EdgeAIcloud)

log_dir = Path("logs").mkdir(parents=True, exist_ok=True)

# stdout and stderr saved to a *.log file.  
with loggerWritter("logs/custon-model-onnx"):
    
    # model compilation options
    compile_options = {
        'tidl_tools_path' : os.environ['TIDL_TOOLS_PATH'],
        'artifacts_folder' : output_dir,
        'tensor_bits' : 16, 
        'accuracy_level' : 1,
        'advanced_options:calibration_frames' : len(calib_images), 
        'advanced_options:calibration_iterations' : 3, # used if accuracy_level = 1   
        'object_detection:meta_arch_type': 6,
        'object_detection:meta_layers_names_list': f'/home/root/notebooks/prebuilt-models/8bits/kd-7060_onnxrt_coco_edgeai-yolox_yolox_s_pose_ti_lite_49p5_78p0_onnx/model/yolox_s_pose_ti_lite_metaarch.prototxt', 
    }

# create the output dir if not present
# clear the directory
os.makedirs(output_dir, exist_ok=True)
for root, dirs, files in os.walk(output_dir, topdown=False):
    [os.remove(os.path.join(root, f)) for f in files]
    [os.rmdir(os.path.join(root, d)) for d in dirs]


# In[5]:


# create & compile model with compile options specified above 
so = rt.SessionOptions()
EP_list = ['TIDLCompilationProvider','CPUExecutionProvider']
sess = rt.InferenceSession(onnx_model_path_EdgeAIcloud ,providers=EP_list, provider_options=[compile_options, {}], sess_options=so)


# In[6]:


input_details = sess.get_inputs()


# In[7]:


label = 'ONR-KD-7060-human-pose-yolox-s-640x640'
pad_color = 128 if 'ae' in label and 'yolo' not in label else 114
pad_type = "corner" if 'yolox' in label else "center"
size = 640
mean = [0.0, 0.0, 0.0]
scale = [1.0, 1.0, 1.0]
layout = 0
reverse_channels = True


# In[8]:


# run inference for each calibration image 
for num in tqdm.trange(len(calib_images)):
    image_name = calib_images[num]
    print('label = ', label)
    print('pad_color = ', pad_color)
    print('pad_type = ', pad_type)
    print('image_name = ', image_name)
    processed_image, top, left, ratio = preprocess_for_onnx_pose_estimation(image_name, size, mean, scale, layout, reverse_channels, pad_color, pad_type)
    
    print('processed_image', processed_image)
    print('top', top)  
    print('left', left)
    print('ratio', ratio)
    
    if not input_details[0].type == 'tensor(float)':
        processed_image = np.uint8(processed_image)

    image_size = processed_image.shape[3]    
    print('image_size = ', image_size)
    out_file=None
    output=None
    output = list(sess.run(None, {input_details[0].name : processed_image})) 
    print('output = ', output)


# In[9]:


EP_list = ['TIDLExecutionProvider','CPUExecutionProvider']


# In[10]:


sess = rt.InferenceSession(onnx_model_path_EdgeAIcloud ,providers=EP_list, provider_options=[compile_options, {}], sess_options=so)


# In[11]:


input_details = sess.get_inputs()


# In[15]:


from scripts.utils import single_img_visualise

image_name = '/home/root/notebooks/side_bend_pose/image-640x640_28.png'
processed_image, top, left, ratio = preprocess_for_onnx_pose_estimation(image_name, size, mean, scale, layout, reverse_channels, pad_color, pad_type)

if not input_details[0].type == 'tensor(float)':
    processed_image = np.uint8(processed_image)

image_size = processed_image.shape[3]    
output = list(sess.run(None, {input_details[0].name : processed_image}))[0]


# In[16]:


# post processing 
get_ipython().run_line_magic('matplotlib', 'inline')
output_image = single_img_visualise(output, image_size, image_name, out_file, top, left, ratio, udp=True, thickness=2, radius=5, label=label)

# plot the outut using matplotlib
plt.rcParams["figure.figsize"]=20,20
plt.rcParams['figure.dpi'] = 200 # 200 e.g. is really fine, but slower
plt.imshow(output_image)
plt.show()


# In[14]:


from scripts.utils import plot_TI_performance_data, plot_TI_DDRBW_data, get_benchmark_output
stats = sess.get_TI_benchmark_data()
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,5))
plot_TI_performance_data(stats, axis=ax)
plt.show()

tt, st, rb, wb = get_benchmark_output(stats)
print(f'Statistics : \n Inferences Per Second   : {1000.0/tt :7.2f} fps')
print(f' Inference Time Per Image : {tt :7.2f} ms  \n DDR BW Per Image        : {rb+ wb : 7.2f} MB')


# In[ ]:

yoga-pose_tda4vm_final.py

Python
Jupyter notebook for SK-TDA4VM
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import os
import re
import sys
import cv2
import tqdm
import onnx
import math
import copy
import time
import shutil
import platform
import itertools

import numpy as np
import onnxruntime as rt
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from pathlib import Path
from munkres import Munkres
from numpy.lib.stride_tricks import as_strided
from IPython.display import Markdown as md
from PIL import Image, ImageFont, ImageDraw, ImageEnhance
from utils import single_img_visualise


# In[2]:


def preprocess_for_onnx_pose_estimation(image_path, size, mean, scale, layout, reverse_channels, pad_color=114, pad_type="center"):
    # Step 1
    # read the image using openCVimport json_tricks as json
    img = cv2.imread(image_path)
    
    # Step 2
    # convert to RGB
    img = img[:,:,::-1]
    
    # Step 3    
    # Most of the onnx models are trained using
    # 512x512 images. The general rule of thumb
    # is to scale the input image while preserving
    # the original aspect ratio so that the
    # longer edge is 512 pixels, and then
    # pad the scaled image to 512x512
    
    size = (size,size) if not isinstance(size, (list,tuple)) else size
    desired_size = size[-1]
    old_size = img.shape[:2] # old_size is in (height, width) format

    ratio = float(desired_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])

    # new_size should be in (width, height) format
    img = cv2.resize(img, (new_size[1], new_size[0]))

    delta_w = size[1] - new_size[1]
    delta_h = size[0] - new_size[0]

    if pad_type=="corner":
        top, left = 0, 0
        bottom, right = delta_h, delta_w
    else:
        delta_w = size[1] - new_size[1]
        delta_h = size[0] - new_size[0]
        top, bottom = delta_h//2, delta_h-(delta_h//2)
        left, right = delta_w//2, delta_w-(delta_w//2)


    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
        value=pad_color)
    
    # Step 4
    # Apply scaling and mean subtraction.
    # if your model is built with an input
    # normalization layer, then you might
    # need to skip this
    if mean is not None and scale is not None:
        img = img.astype('float32')
        for mean, scale, ch in zip(mean, scale, range(img.shape[2])):
            img[:,:,ch] = ((img.astype('float32')[:,:,ch] - mean) * scale)
            
    # Step 5
    if reverse_channels:
        img = img[:,:,::-1]
    
    # Step 6
    img = np.expand_dims(img,axis=0)
    img = np.transpose(img, (0, 3, 1, 2))
    
    return img, top, left, ratio


# In[3]:


tidl_tools_path = '/opt/edgeai-tidl-tools'
output_dir = '/opt/edgeai-gst-apps/yoga_pose_judge/model-artifacts/onnx/yolox_s_pose_ti_lite_49p5_78p0.onnx'
onnx_model_path_TDA4VM = '/opt/edgeai-gst-apps/yoga_pose_judge/model/yolox_s_pose_ti_lite_49p5_78p0.onnx'
onnx.shape_inference.infer_shapes_path(onnx_model_path_TDA4VM, onnx_model_path_TDA4VM)
    
# model compilation options
compile_options = {
    'tidl_tools_path' : tidl_tools_path,
    'artifacts_folder' : output_dir,
    'tensor_bits' : 8, #8,
    'accuracy_level' : 1,
    'object_detection:meta_arch_type': 6,
    'object_detection:meta_layers_names_list': f'/opt/edgeai-gst-apps/yoga_pose_judge/modelyolox_s_pose_ti_lite_metaarch.prototxt',
}


# In[4]:


label = 'ONR-KD-7060-human-pose-yolox-s-640x640'
pad_color = 128 if 'ae' in label and 'yolo' not in label else 114
pad_type = "corner" if 'yolox' in label else "center"
size = 640
mean = [0.0, 0.0, 0.0]
scale = [1.0, 1.0, 1.0]
layout = 0
reverse_channels = True


# In[5]:


so = rt.SessionOptions()


# In[6]:


EP_list = ['TIDLExecutionProvider','CPUExecutionProvider']


# In[7]:


sess = rt.InferenceSession(onnx_model_path_TDA4VM ,providers=EP_list, provider_options=[compile_options, {}], sess_options=so)


# In[8]:


input_details = sess.get_inputs()


# In[9]:


capture = cv2.VideoCapture(2)
time.sleep(0.1)
(success, reference) = capture.read()

cv2.imwrite('/opt/edgeai-gst-apps/yoga_pose_judge/captured_images/live_image.jpg',reference)

capture.release()


# In[10]:


im = Image.open(r"/opt/edgeai-gst-apps/yoga_pose_judge/captured_images/live_image.jpg")
width, height = im.size

left = (width/2) - (height/2)
right = (width/2) + (height/2)
top = height - height
bottom = height

im1 = im.crop((left, top, right, bottom))
newsize = (640, 640)
im1 = im1.resize(newsize)
im1.save("/opt/edgeai-gst-apps/yoga_pose_judge/captured_images/live_image_resized.jpg")


# In[13]:


#image_name = '/opt/edgeai-gst-apps/yoga_pose_judge/side_bend_pose/image-640x640_28.png'
image_name = '/opt/edgeai-gst-apps/yoga_pose_judge/captured_images/live_image_resized.jpg'
processed_image, top, left, ratio = preprocess_for_onnx_pose_estimation(image_name, size, mean, scale, layout, reverse_channels, pad_color, pad_type)

if not input_details[0].type == 'tensor(float)':
    processed_image = np.uint8(processed_image)

image_size = processed_image.shape[3]    
output = list(sess.run(None, {input_details[0].name : processed_image}))[0]


# In[14]:


# post processing 
out_file=None
get_ipython().run_line_magic('matplotlib', 'inline')
#print(output)
output_image = single_img_visualise(output, image_size, image_name, out_file, top, left, ratio, udp=True, thickness=2, radius=5, label=label)

# plot the outut using matplotlib
plt.rcParams["figure.figsize"]=20,20
plt.rcParams['figure.dpi'] = 200 # 200 e.g. is really fine, but slower
plt.imshow(output_image)
plt.show()


# In[ ]:

Credits

Whitney Knitter

Whitney Knitter

157 projects • 1589 followers
All thoughts/opinions are my own and do not reflect those of any company/entity I currently/previously associate with.

Comments