File size: 1,940 Bytes
9ecdb48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import openai
import soundfile

# import whisper
from gtts import gTTS

dic = {"Japanese": "ja", "English": "en"}


class CahtBOT:
    def __init__(self):
        self.messages = None

    def setup(
        self,
        role1,
        content1,
        role2,
        content2,
        role3,
        content3,
        role4,
        content4,
        role5,
        content5,
        api_key,
        language,
    ):
        openai.api_key = api_key
        self.language = dic[language]
        self.messages = [
            {"role": role, "content": content}
            for role, content in [
                [role1, content1],
                [role2, content2],
                [role3, content3],
                [role4, content4],
                [role5, content5],
            ]
            if role != "" and content != ""
        ]

    def transcribe(self, audio):
        sample_rate, data = audio
        soundfile.write(file="tmp.wav", data=data, samplerate=sample_rate)
        audio_file = open("tmp.wav", "rb")
        transcript = openai.Audio.transcribe("whisper-1", audio_file)
        return transcript.text

    def answer_by_chat(self, history, question):
        self.messages.append({"role": "user", "content": question})
        history += [(question, None)]
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo", messages=self.messages
        )
        response_text = response["choices"][0]["message"]["content"]
        response_role = response["choices"][0]["message"]["role"]
        response_audio = self.speech_synthesis(response_text)
        self.messages.append({"role": response_role, "content": response_text})
        # history += [(None, response_text)]
        history += [(None, (response_audio,))]
        return history

    def speech_synthesis(self, sentence):
        tts = gTTS(sentence, lang=self.language)
        tts.save("tmp.wav")
        return "tmp.wav"