Difference between revisions of "2021SummerTeam6"

From MAE/ECE 148 - Introduction to Autonomous Vehicles
Jump to navigation Jump to search
 
(9 intermediate revisions by the same user not shown)
Line 4: Line 4:




'''From Left to Right'''
From Left to Right


'''Kevin Bishara (MAE) | William Lynch (ECE) | Anwar Hsu (ECE)'''
Kevin Bishara (MAE) | William Lynch (ECE) | Anwar Hsu (ECE)


= '''Robot & 3D Modeling Designs''' =
= '''Robot & 3D Modeling Designs''' =
Line 17: Line 17:
'''Electronics Plate'''
'''Electronics Plate'''


[[File:cad1.png|200px]]
[[File:cad1.png|250px]]


'''Camera Mount'''
'''Camera Mount'''
[[File:cad4.png|350px]]
[[File:cad5.png|350px]]


'''Jetson Nano Case'''
'''Jetson Nano Case'''
Line 37: Line 40:


= '''Final Project Overview''' =
= '''Final Project Overview''' =
   
We wanted to implement a speech to text method to control the car. We tried using pico voice API however, had no luck as the documentation was vague to understand in the short couple day time span summer had to offer. If we had more time with this approach we would implement a voice node for ROS and make sure the topics are correctly link to allow the speech to work. Rather for a quick demo solution we decided to use IBM cloud Watson software which constantly hears and translate. This issue with this approach is that it reads all the noise so having a car running would not make this API ideal.
==YoloV5 Object Detection Code ==
 
== '''Our Python Code!''' ==
 
  <nowiki>
  <nowiki>
import rospy
# -*- coding: utf-8 -*-
import cv2
"""
import numpy as np
Created on Mon Mar 29 14:31:14 2021
from std_msgs.msg import Int32, Int32MultiArray
 
from sensor_msgs.msg import Image
@author: Anwar
from decoder import decodeImage
"""
import time
## You need to install pyaudio to run this example
from cv_bridge import CvBridge
# pip install pyaudio
from elements.yolo import OBJ_DETECTION
 
# When using a microphone, the AudioSource `input` parameter would be
# initialised as a queue. The pyaudio stream would be continuosly adding
# recordings to the queue, and the websocket client would be sending the
# recordings to the speech to text service
 
 
import pyaudio
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from threading import Thread
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
 
try:
    from Queue import Queue, Full
except ImportError:
    from queue import Queue, Full
 
###############################################
#### Initalize queue to store the recordings ##
###############################################
CHUNK = 1024
# Note: It will discard if the websocket client can't consumme fast enough
# So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
# Buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
 
# Create an instance of AudioSource
audio_source = AudioSource(q, True, True)
 
###############################################
#### Prepare Speech to Text Service ########
###############################################
 
# initialize speech to text service
authenticator = IAMAuthenticator('your API key')
speech_to_text = SpeechToTextV1(authenticator=authenticator)
 
# define callback for the speech to text service
class MyRecognizeCallback(RecognizeCallback):
    def __init__(self):
        RecognizeCallback.__init__(self)
 
    def on_transcription(self, transcript):
        print(transcript)
 
    def on_connected(self):
        print('Connection was successful')
 
    def on_error(self, error):
        print('Error received: {}'.format(error))
 
    def on_inactivity_timeout(self, error):
        print('Inactivity timeout: {}'.format(error))
 
    def on_listening(self):
        print('Service is listening')
 
    def on_hypothesis(self, hypothesis):
        print(hypothesis)


# Give names for nodes and topics for ROS
    def on_data(self, data):
STOPSIGN_NODE_NAME = 'stopsign_node'
        print(data)
STOPSIGN_TOPIC_NAME = 'StopSign'
CAMERA_TOPIC_NAME = 'camera_rgb'


# types of objects that can be detected
    def on_close(self):
        print("Connection closed")


Object_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',                'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',                'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',                'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',                'hair drier', 'toothbrush' ]
# this function will initiate the recognize service and pass in the AudioSource
def recognize_using_weboscket(*args):
    mycallback = MyRecognizeCallback()
    speech_to_text.recognize_using_websocket(audio=audio_source,
                                            content_type='audio/l16; rate=44100',
                                            recognize_callback=mycallback,
                                            interim_results=True)


###############################################
#### Prepare the for recording using Pyaudio ##
###############################################


Object_colors = list(np.random.rand(80,3)*255)
# Variables for recording the speech
Object_detector = OBJ_DETECTION('weights/yolov5s.pt', Object_classes)
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100


# define callback for pyaudio to store the recording in queue
def pyaudio_callback(in_data, frame_count, time_info, status):
    try:
        q.put(in_data)
    except Full:
        pass # discard
    return (None, pyaudio.paContinue)


class StopSignDetection:
# instantiate pyaudio
        def __init__(self):
audio = pyaudio.PyAudio()
            self.init_node = rospy.init_node(STOPSIGN_NODE_NAME, anonymous=False)                # initialize the node
            self.StopSign_publisher = rospy.Publisher(STOPSIGN_TOPIC_NAME,Int32, queue_size=1)    # make this node a publisher
            self.camera_subscriber = rospy.Subscriber(CAMERA_TOPIC_NAME,Image,self.detect_stop)  # subscribe to the camera feed
            self.bridge =CvBridge()
            self.stopsign = Int32()


        def detect_stop(self,data):
# open stream using callback
                frame = self.bridge.imgmsg_to_cv2(data)         # get frame from camera feed data
stream = audio.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    stream_callback=pyaudio_callback,
    start=False
)


                        # detection process
#########################################################################
                objs = Object_detector.detect(frame)            # detect the object
#### Start the recording and start service to recognize the stream ######
#########################################################################


                        # plotting
print("Enter CTRL+C to end recording...")
                for obj in objs:
stream.start_stream()
                                # print(obj)
                                label = obj['label']
                                score = obj['score']
                                [(xmin,ymin),(xmax,ymax)] = obj['bbox']
                                color = Object_colors[Object_classes.index(label)]
                                frame = cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), color, 2)
                                frame = cv2.putText(frame, f'{label} ({str(score)})', (xmin,ymin),
                cv2.FONT_HERSHEY_SIMPLEX , 0.75, color, 1, cv2.LINE_AA)
                                cv2.imshow('stopsign',frame)                   # create window to show objects detected
                                cv2.waitKey(1)
                          # if a stop sign is detected send out a 1 else send out 0


                                if label == 'stop sign' and  score > 0.1:
try:
                                          self.stopsign.data = 1
    recognize_thread = Thread(target=recognize_using_weboscket, args=())
                                          self.StopSign_publisher.publish(self.stopsign)
     recognize_thread.start()
                                else:
                                          self.stopsign.data = 0
                                          self.StopSign_publisher.publish(self.stopsign)
def main():
    StopSign_detector = StopSignDetection()
    rate = rospy.Rate(15)
     while not rospy.is_shutdown():
      rospy.spin()
      rate.sleep()


if __name__=='__main__':
    while True:
     main()
        pass
except KeyboardInterrupt:
     # stop recording
    stream.stop_stream()
    stream.close()
    audio.terminate()
    audio_source.completed_recording()


</nowiki>
</nowiki>

Latest revision as of 05:49, 6 September 2021

Team 6 Members

P1.jpg


From Left to Right

Kevin Bishara (MAE) | William Lynch (ECE) | Anwar Hsu (ECE)

Robot & 3D Modeling Designs

Our Robot

P2.png

Electronics Plate

Cad1.png

Camera Mount

Cad4.png Cad5.png

Jetson Nano Case

Cad2.png Cad3.png

Autonomous Laps

    DonkeyCar Laps

Our autonomous laps for DonkeyCar can be found here.

    OpenCV/ROS Laps

Our OpenCV/ROS autonomous laps can be found here.

Final Project Overview

We wanted to implement a speech to text method to control the car. We tried using pico voice API however, had no luck as the documentation was vague to understand in the short couple day time span summer had to offer. If we had more time with this approach we would implement a voice node for ROS and make sure the topics are correctly link to allow the speech to work. Rather for a quick demo solution we decided to use IBM cloud Watson software which constantly hears and translate. This issue with this approach is that it reads all the noise so having a car running would not make this API ideal.

Our Python Code!

# -*- coding: utf-8 -*-
"""
Created on Mon Mar 29 14:31:14 2021

@author: Anwar
"""
## You need to install pyaudio to run this example
# pip install pyaudio

# When using a microphone, the AudioSource `input` parameter would be
# initialised as a queue. The pyaudio stream would be continuosly adding
# recordings to the queue, and the websocket client would be sending the
# recordings to the speech to text service


import pyaudio
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from threading import Thread
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

try:
    from Queue import Queue, Full
except ImportError:
    from queue import Queue, Full

###############################################
#### Initalize queue to store the recordings ##
###############################################
CHUNK = 1024
# Note: It will discard if the websocket client can't consumme fast enough
# So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
# Buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))

# Create an instance of AudioSource
audio_source = AudioSource(q, True, True)

###############################################
#### Prepare Speech to Text Service ########
###############################################

# initialize speech to text service
authenticator = IAMAuthenticator('your API key')
speech_to_text = SpeechToTextV1(authenticator=authenticator)

# define callback for the speech to text service
class MyRecognizeCallback(RecognizeCallback):
    def __init__(self):
        RecognizeCallback.__init__(self)

    def on_transcription(self, transcript):
        print(transcript)

    def on_connected(self):
        print('Connection was successful')

    def on_error(self, error):
        print('Error received: {}'.format(error))

    def on_inactivity_timeout(self, error):
        print('Inactivity timeout: {}'.format(error))

    def on_listening(self):
        print('Service is listening')

    def on_hypothesis(self, hypothesis):
        print(hypothesis)

    def on_data(self, data):
        print(data)

    def on_close(self):
        print("Connection closed")

# this function will initiate the recognize service and pass in the AudioSource
def recognize_using_weboscket(*args):
    mycallback = MyRecognizeCallback()
    speech_to_text.recognize_using_websocket(audio=audio_source,
                                             content_type='audio/l16; rate=44100',
                                             recognize_callback=mycallback,
                                             interim_results=True)

###############################################
#### Prepare the for recording using Pyaudio ##
###############################################

# Variables for recording the speech
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100

# define callback for pyaudio to store the recording in queue
def pyaudio_callback(in_data, frame_count, time_info, status):
    try:
        q.put(in_data)
    except Full:
        pass # discard
    return (None, pyaudio.paContinue)

# instantiate pyaudio
audio = pyaudio.PyAudio()

# open stream using callback
stream = audio.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    stream_callback=pyaudio_callback,
    start=False
)

#########################################################################
#### Start the recording and start service to recognize the stream ######
#########################################################################

print("Enter CTRL+C to end recording...")
stream.start_stream()

try:
    recognize_thread = Thread(target=recognize_using_weboscket, args=())
    recognize_thread.start()

    while True:
        pass
except KeyboardInterrupt:
    # stop recording
    stream.stop_stream()
    stream.close()
    audio.terminate()
    audio_source.completed_recording()