Difference between revisions of "2021SummerTeam6"
(44 intermediate revisions by the same user not shown) | |||
Line 4: | Line 4: | ||
From Left to Right | |||
Kevin Bishara (MAE) | William Lynch (ECE) | Anwar Hsu (ECE) | |||
= '''Robot | = '''Robot & 3D Modeling Designs''' = | ||
[[File:p2. | |||
'''Our Robot''' | |||
[[File:p2.png|350px]] | |||
'''Electronics Plate''' | |||
[[File:cad1.png|250px]] | |||
'''Camera Mount''' | |||
[[File:cad4.png|350px]] | |||
[[File:cad5.png|350px]] | |||
'''Jetson Nano Case''' | |||
[[File:cad2.png|350px]] | |||
[[File:cad3.png|350px]] | |||
= '''Autonomous Laps''' = | |||
'''DonkeyCar Laps''' | |||
Our [https://www.youtube.com/watch?v=nPVh0jeVX9o&ab autonomous laps for DonkeyCar] can be found here. | |||
'''OpenCV/ROS Laps''' | |||
Our [https://www.youtube.com/watch?v=R92musCCoJM&ab OpenCV/ROS autonomous laps ] can be found here. | |||
= '''Final Project Overview''' = | |||
We wanted to implement a speech to text method to control the car. We tried using pico voice API however, had no luck as the documentation was vague to understand in the short couple day time span summer had to offer. If we had more time with this approach we would implement a voice node for ROS and make sure the topics are correctly link to allow the speech to work. Rather for a quick demo solution we decided to use IBM cloud Watson software which constantly hears and translate. This issue with this approach is that it reads all the noise so having a car running would not make this API ideal. | |||
== '''Our Python Code!''' == | |||
<nowiki> | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Mar 29 14:31:14 2021 | |||
@author: Anwar | |||
""" | |||
## You need to install pyaudio to run this example | |||
# pip install pyaudio | |||
# When using a microphone, the AudioSource `input` parameter would be | |||
# initialised as a queue. The pyaudio stream would be continuosly adding | |||
# recordings to the queue, and the websocket client would be sending the | |||
# recordings to the speech to text service | |||
import pyaudio | |||
from ibm_watson import SpeechToTextV1 | |||
from ibm_watson.websocket import RecognizeCallback, AudioSource | |||
from threading import Thread | |||
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator | |||
try: | |||
from Queue import Queue, Full | |||
except ImportError: | |||
from queue import Queue, Full | |||
############################################### | |||
#### Initalize queue to store the recordings ## | |||
############################################### | |||
CHUNK = 1024 | |||
# Note: It will discard if the websocket client can't consumme fast enough | |||
# So, increase the max size as per your choice | |||
BUF_MAX_SIZE = CHUNK * 10 | |||
# Buffer to store audio | |||
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK))) | |||
# Create an instance of AudioSource | |||
audio_source = AudioSource(q, True, True) | |||
############################################### | |||
#### Prepare Speech to Text Service ######## | |||
############################################### | |||
# initialize speech to text service | |||
authenticator = IAMAuthenticator('your API key') | |||
speech_to_text = SpeechToTextV1(authenticator=authenticator) | |||
# define callback for the speech to text service | |||
class MyRecognizeCallback(RecognizeCallback): | |||
def __init__(self): | |||
RecognizeCallback.__init__(self) | |||
def on_transcription(self, transcript): | |||
print(transcript) | |||
def on_connected(self): | |||
print('Connection was successful') | |||
def on_error(self, error): | |||
print('Error received: {}'.format(error)) | |||
def on_inactivity_timeout(self, error): | |||
print('Inactivity timeout: {}'.format(error)) | |||
def on_listening(self): | |||
print('Service is listening') | |||
def on_hypothesis(self, hypothesis): | |||
print(hypothesis) | |||
def on_data(self, data): | |||
print(data) | |||
def on_close(self): | |||
print("Connection closed") | |||
# this function will initiate the recognize service and pass in the AudioSource | |||
def recognize_using_weboscket(*args): | |||
mycallback = MyRecognizeCallback() | |||
speech_to_text.recognize_using_websocket(audio=audio_source, | |||
content_type='audio/l16; rate=44100', | |||
recognize_callback=mycallback, | |||
interim_results=True) | |||
############################################### | |||
#### Prepare the for recording using Pyaudio ## | |||
############################################### | |||
# Variables for recording the speech | |||
FORMAT = pyaudio.paInt16 | |||
CHANNELS = 1 | |||
RATE = 44100 | |||
# define callback for pyaudio to store the recording in queue | |||
def pyaudio_callback(in_data, frame_count, time_info, status): | |||
try: | |||
q.put(in_data) | |||
except Full: | |||
pass # discard | |||
return (None, pyaudio.paContinue) | |||
# instantiate pyaudio | |||
audio = pyaudio.PyAudio() | |||
# open stream using callback | |||
stream = audio.open( | |||
format=FORMAT, | |||
channels=CHANNELS, | |||
rate=RATE, | |||
input=True, | |||
frames_per_buffer=CHUNK, | |||
stream_callback=pyaudio_callback, | |||
start=False | |||
) | |||
######################################################################### | |||
#### Start the recording and start service to recognize the stream ###### | |||
######################################################################### | |||
print("Enter CTRL+C to end recording...") | |||
stream.start_stream() | |||
try: | |||
recognize_thread = Thread(target=recognize_using_weboscket, args=()) | |||
recognize_thread.start() | |||
while True: | |||
pass | |||
except KeyboardInterrupt: | |||
# stop recording | |||
stream.stop_stream() | |||
stream.close() | |||
audio.terminate() | |||
audio_source.completed_recording() | |||
</nowiki> |
Latest revision as of 05:49, 6 September 2021
Team 6 Members
From Left to Right
Kevin Bishara (MAE) | William Lynch (ECE) | Anwar Hsu (ECE)
Robot & 3D Modeling Designs
Our Robot
Electronics Plate
Camera Mount
Jetson Nano Case
Autonomous Laps
DonkeyCar Laps
Our autonomous laps for DonkeyCar can be found here.
OpenCV/ROS Laps
Our OpenCV/ROS autonomous laps can be found here.
Final Project Overview
We wanted to implement a speech to text method to control the car. We tried using pico voice API however, had no luck as the documentation was vague to understand in the short couple day time span summer had to offer. If we had more time with this approach we would implement a voice node for ROS and make sure the topics are correctly link to allow the speech to work. Rather for a quick demo solution we decided to use IBM cloud Watson software which constantly hears and translate. This issue with this approach is that it reads all the noise so having a car running would not make this API ideal.
Our Python Code!
# -*- coding: utf-8 -*- """ Created on Mon Mar 29 14:31:14 2021 @author: Anwar """ ## You need to install pyaudio to run this example # pip install pyaudio # When using a microphone, the AudioSource `input` parameter would be # initialised as a queue. The pyaudio stream would be continuosly adding # recordings to the queue, and the websocket client would be sending the # recordings to the speech to text service import pyaudio from ibm_watson import SpeechToTextV1 from ibm_watson.websocket import RecognizeCallback, AudioSource from threading import Thread from ibm_cloud_sdk_core.authenticators import IAMAuthenticator try: from Queue import Queue, Full except ImportError: from queue import Queue, Full ############################################### #### Initalize queue to store the recordings ## ############################################### CHUNK = 1024 # Note: It will discard if the websocket client can't consumme fast enough # So, increase the max size as per your choice BUF_MAX_SIZE = CHUNK * 10 # Buffer to store audio q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK))) # Create an instance of AudioSource audio_source = AudioSource(q, True, True) ############################################### #### Prepare Speech to Text Service ######## ############################################### # initialize speech to text service authenticator = IAMAuthenticator('your API key') speech_to_text = SpeechToTextV1(authenticator=authenticator) # define callback for the speech to text service class MyRecognizeCallback(RecognizeCallback): def __init__(self): RecognizeCallback.__init__(self) def on_transcription(self, transcript): print(transcript) def on_connected(self): print('Connection was successful') def on_error(self, error): print('Error received: {}'.format(error)) def on_inactivity_timeout(self, error): print('Inactivity timeout: {}'.format(error)) def on_listening(self): print('Service is listening') def on_hypothesis(self, hypothesis): print(hypothesis) def on_data(self, data): print(data) def on_close(self): print("Connection closed") # this function will initiate the recognize service and pass in the AudioSource def recognize_using_weboscket(*args): mycallback = MyRecognizeCallback() speech_to_text.recognize_using_websocket(audio=audio_source, content_type='audio/l16; rate=44100', recognize_callback=mycallback, interim_results=True) ############################################### #### Prepare the for recording using Pyaudio ## ############################################### # Variables for recording the speech FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 # define callback for pyaudio to store the recording in queue def pyaudio_callback(in_data, frame_count, time_info, status): try: q.put(in_data) except Full: pass # discard return (None, pyaudio.paContinue) # instantiate pyaudio audio = pyaudio.PyAudio() # open stream using callback stream = audio.open( format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=pyaudio_callback, start=False ) ######################################################################### #### Start the recording and start service to recognize the stream ###### ######################################################################### print("Enter CTRL+C to end recording...") stream.start_stream() try: recognize_thread = Thread(target=recognize_using_weboscket, args=()) recognize_thread.start() while True: pass except KeyboardInterrupt: # stop recording stream.stop_stream() stream.close() audio.terminate() audio_source.completed_recording()