GOWTHAM SBalamurugan.K21CS004 Annalakshmi
Created July 29, 2024

Gallaudet Accessible AI

In today's world, many rely on AI tools for daily tasks, yet accessibility remains a challenge for speech and hearing impaired persons.

106
Gallaudet Accessible AI

Things used in this project

Hardware components

Grove Vision AI Module V2
Seeed Studio Grove Vision AI Module V2
×1
XIAO ESP32C3
Seeed Studio XIAO ESP32C3
×1

Software apps and online services

AMD Accelerator Cloud
Github Roboflow
Windows 10
Microsoft Windows 10
Linux Blender

Story

Read more

Custom parts and enclosures

Hand Sign

Sketchfab still processing.

Hand Sign2

Sketchfab still processing.

Hand 3

Sketchfab still processing.

Future Design

Sketchfab still processing.

Future product

Sketchfab still processing.

Schematics

Process of The Entire System

In Future Development Chart

Code

Realtime Running code

Python
This code only for Demo purpose in feature this code will be update
import cv2
from ultralytics import YOLO
import sys
import pygame
import tkinter as tk
from tkinter import messagebox
import os
import subprocess
import psutil
import time
from gpt import gptvoice


pygame.mixer.init()
alarm_sound = pygame.mixer.Sound("D:/f1.mp3")
global class_id


mask_model_path = 'D:/best1.pt'
mask_model = YOLO(mask_model_path)
tracker = cv2.TrackerMIL_create()

def play_media(file_path):
    
    player_path = "C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe"

    if not os.path.exists(player_path):
        print("Windows Media Player not found at the specified path.")
        return

 
    cmd = f'"{player_path}" "{file_path}"'

    try:
        
        subprocess.Popen(cmd, shell=True)
        print(f"Playing {file_path} in Windows Media Player...")
        time.sleep(5)  

    except Exception as e:
        print(f"Error occurred: {e}")


def start_detection_and_tracking():
    video = cv2.VideoCapture(0)
    global class_id
    if not video.isOpened():
        messagebox.showerror("Error", "Could not open camera")
        return

    ok, frame = video.read()

    if not ok:
        messagebox.showerror("Error", "Cannot read camera frame")
        return

   
    bbox = cv2.selectROI(frame, False)
    ok = tracker.init(frame, bbox)
    initial_center = (bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2)
    moved = False

   
    mask_threshold = 0.5
    movement_threshold = 10

    while True:
       
        ok, frame = video.read()
        if not ok:
            break

        
        mask_results = mask_model(frame)[0]
        mask_detected = False

        for result in mask_results.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = result

            if score > mask_threshold:
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 4)
                cv2.putText(frame, mask_results.names[int(class_id)].upper(), (int(x1), int(y1 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)

                mask_detected = True

        try:
            for detection in mask_results:
                if 'class_id' in detection:
                    class_id = detection['class_id']
                    label = mask_results.names[int(class_id)].upper()
                else:
                    print("class_id not found in detection")
        except KeyError:
            print("Error: class_id not found in detection. Continuing process...")
            continue

        
        ok, bbox = tracker.update(frame)

        current_center = (bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2)
        distance = ((current_center[0] - initial_center[0]) ** 2 + (current_center[1] - initial_center[1]) ** 2) ** 0.5

        if distance > movement_threshold:
            moved = True

        if mask_detected or moved:
            
            label = mask_results.names[int(class_id)].upper() 

            

            
            with open('detect.txt', 'w') as f:
                f.write(label + '\n')

            with open('detect.txt', 'r') as f:
                content = f.read()

            
            if 'SONG' in content:
                media_file = "D:/f1.mp3" 
                play_media(media_file)
                with open('detect.txt', 'w') as f:
                    f.truncate(0)
            if 'MOVIE' in content:
                media_file = "D:\Future Man - 01x01 - Pilot.mkv"  
                play_media(media_file)
                with open('detect.txt', 'w') as f:
                    f.truncate(0)
                    "D:\Future Man - 01x01 - Pilot.mkv"
            if 'STORY' in content:
                delay=10
                time.sleep(delay)
                gptvoice(content)
                with open('detect.txt', 'w') as f:
                    f.truncate(0)
                os.remove('detect.txt')

        
        cv2.imshow("Combined Detection and Tracking", frame)

       
        k = cv2.waitKey(1) & 0xff
        if k == 27:
            break

    
    video.release()
    cv2.destroyAllWindows()


root = tk.Tk()
root.title("Detection")

start_button = tk.Button(root, text="Start Detection", command=start_detection_and_tracking)
start_button.pack()

exit_button = tk.Button(root, text="Exit", command=root.destroy)
exit_button.pack()

root.mainloop()

Result getting from Generative model code

Python
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import pyttsx3

def gptvoice(prompt):
   
    model_name = 'gpt2'
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)

   
    inputs = tokenizer.encode("tell "+prompt, return_tensors='pt')

    
    outputs = model.generate(
        inputs, 
        max_length=200, 
        num_return_sequences=1, 
        no_repeat_ngram_size=2, 
        early_stopping=True, 
        temperature=0.7,  
        top_k=50,         
        top_p=0.95,       
        do_sample=True    
    )

    
    engine = pyttsx3.init()

    
    for i, output in enumerate(outputs):
        
        story = tokenizer.decode(output, skip_special_tokens=True)
        
       
        print(f"Story {i + 1}:\n{story}\n")
        
        
        engine.say(story)

   
    engine.runAndWait()

Sample code For 3Dmodel matching and create a video rely on gpt5 model result

Python
import bpy
import os

def load_fbx(filepath):
    bpy.ops.import_scene.fbx(filepath=filepath)

def set_output_path(output_path):
    bpy.context.scene.render.filepath = output_path

def set_render_settings():
    bpy.context.scene.render.image_settings.file_format = 'PNG'
    bpy.context.scene.frame_start = 1
    bpy.context.scene.frame_end = 250 
    bpy.context.scene.frame_step = 1

def render_animation():
    bpy.ops.render.render(animation=True)

def create_video_from_fbx(fbx_file, output_dir, output_video):
    
    bpy.ops.wm.read_factory_settings(use_empty=True)
    
   
    load_fbx(fbx_file)
    
    
    set_render_settings()
    set_output_path(os.path.join(output_dir, 'frame_####.png'))
    
   
    render_animation()
    
    
    os.system(f'ffmpeg -r 30 -i {output_dir}/frame_%04d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p {output_video}')

if __name__ == "__main__":
    import sys
    fbx_file = sys.argv[1]
    output_dir = sys.argv[2]
    output_video = sys.argv[3]
    create_video_from_fbx(fbx_file, output_dir, output_video)

second part of above code

Python
import subprocess

def generate_video(fbx_file, output_dir, output_video):
   
    subprocess.run([
        'blender', '--background', '--python', 'render_fbx.py', '--', fbx_file, output_dir, output_video
    ])

def match_text_to_model(generated_text):
   
    return "/path/to/matched_model.fbx"

def main():
    generated_text = "Your GPT-generated text here" 
    fbx_file = match_text_to_model(generated_text)
    output_dir = "/path/to/output_frames"
    output_video = "/path/to/output_video.mp4"
    
    generate_video(fbx_file, output_dir, output_video)
    import cv2
    cap = cv2.VideoCapture(output_video)
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            cv2.imshow('Video', frame)
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break
        else:
            break
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Credits

GOWTHAM S
2 projects • 3 followers
Balamurugan.K
0 projects • 1 follower
21CS004 Annalakshmi
0 projects • 0 followers

Comments