1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
import os
import azure.cognitiveservices.speech as speechsdk
from openai import AzureOpenAI
# This example requires environment variables named
# "OPEN_AI_KEY", "OPEN_AI_ENDPOINT" and "OPEN_AI_DEPLOYMENT_NAME"
# Your endpoint should look like the following:
# https://YOUR_OPEN_AI_RESOURCE_NAME.openai.azure.com/
client = AzureOpenAI(
azure_endpoint=os.environ.get('OPEN_AI_ENDPOINT'),
api_key=os.environ.get('OPEN_AI_KEY'),
api_version="2024-05-01-preview"
)
# This will correspond to the custom name you chose for
# your deployment when you deployed a model.
deployment_id=os.environ.get('OPEN_AI_DEPLOYMENT_NAME')
# This example requires environment variables
# named "SPEECH_KEY" and "SPEECH_REGION"
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'),
region=os.environ.get('SPEECH_REGION'))
audio_output_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
# Should be the locale for the speaker's language.
speech_config.speech_recognition_language="zh-CN"
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
audio_config=audio_config)
# The language of the voice that responds on behalf of Azure OpenAI.
speech_config.speech_synthesis_voice_name='zh-CN-YunyiMultilingualNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config,
audio_config=audio_output_config)
# tts sentence end mark
tts_sentence_end = [ ".", "!", "?", ";", "。", "!", "?", ";", "\n" ]
# Prompts Azure OpenAI with a request and synthesizes the response.
def ask_openai(prompt):
# Ask Azure OpenAI in streaming way
response = client.chat.completions.create(model=deployment_id,
max_tokens=200,
stream=True,
messages=[
{"role": "user", "content": prompt}
])
collected_messages = []
last_tts_request = None
# iterate through the stream response stream
for chunk in response:
if len(chunk.choices) > 0:
# extract the message
chunk_message = chunk.choices[0].delta.content
if chunk_message is not None:
# save the message
collected_messages.append(chunk_message)
# sentence end found
if chunk_message in tts_sentence_end:
# join the recieved message together to build a sentence
text = ''.join(collected_messages).strip()
if text != '':
# if sentence only have \n or space, we could skip
print(f"Speech synthesized to speaker for: {text}")
last_tts_request = speech_synthesizer
.speak_text_async(text)
collected_messages.clear()
if last_tts_request:
last_tts_request.get()
# Continuously listens for speech input to recognize
# and send as text to Azure OpenAI
def chat_with_open_ai():
while True:
print("""Azure OpenAI is listening.
Say 'Stop' or press Ctrl-Z to end the conversation.""")
try:
# Get audio from the microphone and
# then send it to the TTS service.
speech_recognition_result = speech_recognizer.
recognize_once_async().get()
# If speech is recognized, send it to Azure OpenAI
# and listen for the response.
if speech_recognition_result.reason == speechsdk.ResultReason
.RecognizedSpeech:
if speech_recognition_result.text == "Stop."
or speech_recognition_result.text == "Stop。":
print("Conversation ended.")
break
print("Recognized speech: {}".format(
speech_recognition_result.text))
ask_openai(speech_recognition_result.text)
elif speech_recognition_result.reason == speechsdk.ResultReason
.NoMatch:
print("No speech could be recognized: {}".format(
speech_recognition_result.no_match_details))
break
elif speech_recognition_result.reason == speechsdk.ResultReason
.Canceled:
cancellation_details = speech_recognition_result
.cancellation_details
print("Speech Recognition canceled: {}".format(
cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason
.Error:
print("Error details: {}".format(
cancellation_details.error_details))
except EOFError:
break
# Main
try:
chat_with_open_ai()
except Exception as err:
print("Encountered exception. {}".format(err))
|