|
<!DOCTYPE html> |
|
<html> |
|
|
|
<head> |
|
<meta charset="utf-8"> |
|
<meta name="viewport" content="width=device-width"> |
|
<title>Sample Chat AI VTuber</title> |
|
<link rel="stylesheet" href="aivtuber.css"> |
|
<script src="./aivtuber.js"></script> |
|
<title>Live2Dサンプル</title> |
|
|
|
<script src="./live2dcubismcore.js"></script> |
|
<script src="//cdn.jsdelivr.net/gh/dylanNew/live2d/webgl/Live2D/lib/live2d.min.js"></script> |
|
|
|
<script src="//cdnjs.cloudflare.com/ajax/libs/pixi.js/5.1.3/pixi.min.js"></script> |
|
<script src="//cdn.jsdelivr.net/npm/pixi-live2d-display/dist/index.min.js"></script> |
|
|
|
<script src="//cdn.jsdelivr.net/npm/[email protected]/dist/kalidokit.umd.js"></script> |
|
</head> |
|
|
|
<body> |
|
|
|
<video id="my-video" autoplay></video> |
|
|
|
<script> |
|
let _debug = 0; |
|
'use strict'; |
|
|
|
function speaks(input) { |
|
var text = input; |
|
var speech = new SpeechSynthesisUtterance(text); |
|
speech.lang = "ja-JP"; |
|
window.speechSynthesis.speak(speech); |
|
} |
|
|
|
function speak(input) { |
|
var text = input; |
|
var speech = new SpeechSynthesisUtterance(text); |
|
|
|
|
|
var voices = window.speechSynthesis.getVoices(); |
|
for (var i = 0; i < voices.length; i++) { |
|
if (voices[i].name.includes("Google 日本語") || voices[i].name.includes("Male")) { |
|
speech.voice = voices[i]; |
|
break; |
|
} |
|
} |
|
|
|
|
|
speech.lang = "ja-JP"; |
|
speech.pitch = 1; |
|
speech.rate = 1; |
|
speech.volume = 1; |
|
|
|
|
|
window.speechSynthesis.speak(speech); |
|
} |
|
|
|
class MiiboAvatar { |
|
constructor(config) { |
|
this.container = config.container; |
|
this.speechToTextOptions = config.option.speech_to_text; |
|
this.miiboOptions = config.option.miibo; |
|
this.didOptions = config.option.d_id; |
|
this.initialize(); |
|
} |
|
|
|
initialize() { |
|
|
|
const RTCPeerConnection = window.RTCPeerConnection || window.webkitRTCPeerConnection || window.mozRTCPeerConnection; |
|
|
|
this.videoElement = document.getElementById(this.container); |
|
this.videoElement.setAttribute('playsinline', ''); |
|
this.playIdleVideo(); |
|
|
|
this.createNewStream(); |
|
this.rec = new webkitSpeechRecognition() |
|
|
|
this.speaching = false; |
|
this.processing = false; |
|
this.streams = []; |
|
} |
|
|
|
async createNewStream() { |
|
if(_debug==1) |
|
alert("53") |
|
try { |
|
this.stopAllStreams(); |
|
this.closePC(); |
|
|
|
let presenter = {"source_url": this.didOptions.presenter.image_url} |
|
const sessionResponse = await this.fetchWithRetries(`https://api.d-id.com/${this.didOptions.service}/streams`, { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify(presenter), |
|
}); |
|
|
|
const { id: newStreamId, offer, ice_servers: iceServers, session_id: newSessionId } = await sessionResponse.json(); |
|
this.streamId = newStreamId; |
|
this.sessionId = newSessionId; |
|
|
|
try { |
|
this.sessionClientAnswer = await this.createPeerConnection(offer, iceServers); |
|
} catch (e) { |
|
console.log('Error creating peer connection:', e); |
|
this.stopAllStreams(); |
|
this.closePC(); |
|
return; |
|
} |
|
|
|
const sdpResponse = await fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}/sdp`, { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
answer: this.sessionClientAnswer, |
|
session_id: this.sessionId, |
|
}), |
|
}); |
|
|
|
|
|
} catch (error) { |
|
console.log('Error creating new stream:', error); |
|
|
|
} |
|
} |
|
|
|
speechRecogInit() { |
|
this.audioContext = new (window.AudioContext || window.webkitAudioContext)() |
|
|
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { |
|
navigator.mediaDevices.getUserMedia({audio: true}).then((stream) => { |
|
var input = this.audioContext.createMediaStreamSource(stream) |
|
this.audioContext.resume() |
|
this.recorder = new Recorder(input) |
|
}) |
|
} |
|
} |
|
|
|
|
|
startRecording() { |
|
this.recorder && this.recorder.record(); |
|
} |
|
|
|
stopRecording() { |
|
this.playLoadingVideo(); |
|
this.recorder && this.recorder.stop(); |
|
this.audioRecognize(); |
|
this.recorder.clear(); |
|
} |
|
|
|
audioRecognize() { |
|
this.recorder && this.recorder.exportWAV((blob) => { |
|
let reader = new FileReader() |
|
reader.onload = () => { |
|
let result = new Uint8Array(reader.result) |
|
|
|
let data = { |
|
"config": { |
|
"encoding": "LINEAR16", |
|
"languageCode": "ja-JP", |
|
"alternativeLanguageCodes": ["en-US"], |
|
"audio_channel_count": 2 |
|
}, |
|
"audio": { |
|
"content": this.arrayBufferToBase64(result) |
|
} |
|
} |
|
fetch('https://speech.googleapis.com/v1/speech:recognize?key=' + this.speechToTextOptions.api_key, { |
|
method: 'POST', |
|
headers: { |
|
'Content-Type': 'application/json; charset=utf-8' |
|
}, |
|
body: JSON.stringify(data) |
|
}).then((response) => { |
|
return response.text() |
|
}).then((text) => { |
|
let result_json = JSON.parse(text) |
|
text = result_json.results[0].alternatives[0].transcript; |
|
this.languageCode = result_json.results[0].languageCode; |
|
|
|
this.ask(text) |
|
}) |
|
} |
|
reader.readAsArrayBuffer(blob) |
|
}) |
|
} |
|
|
|
autoRecognizeMessage(message) { |
|
|
|
|
|
|
|
if(_debug==1) |
|
alert("start word test") |
|
this.rec.continuous = false |
|
this.rec.interimResults = false |
|
this.rec.lang = 'ja-JP' |
|
this.processing = true |
|
this.playLoadingVideo(); |
|
|
|
this.rec.stop() |
|
|
|
let transcript = message |
|
|
|
this.ask(transcript); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
this.rec.onend = () => { this.autoRecognizeRestart() } |
|
this.rec.start() |
|
} |
|
|
|
autoRecognize() { |
|
this.rec.continuous = false |
|
this.rec.interimResults = false |
|
this.rec.lang = 'ja-JP' |
|
|
|
this.rec.onresult = (e) => { |
|
this.processing = true |
|
this.playLoadingVideo(); |
|
|
|
this.rec.stop() |
|
if(_debug==1) |
|
alert("start") |
|
|
|
for (var i = e.resultIndex; i < e.results.length; i++) { |
|
if (!e.results[i].isFinal) continue |
|
|
|
const { transcript } = e.results[i][0] |
|
this.ask(transcript); |
|
} |
|
} |
|
|
|
this.rec.onend = () => { this.autoRecognizeRestart() } |
|
this.rec.start() |
|
} |
|
|
|
autoRecognizeRestart() { |
|
if (this.processing) { |
|
setTimeout(() => {this.autoRecognizeRestart()}, 1000) |
|
} else { |
|
this.rec.start() |
|
} |
|
} |
|
|
|
arrayBufferToBase64(buffer) { |
|
let binary = '' |
|
let bytes = new Uint8Array(buffer); |
|
let len = bytes.byteLength |
|
for (let i = 0; i < len; i++) { |
|
binary += String.fromCharCode(bytes[i]) |
|
} |
|
return window.btoa(binary) |
|
} |
|
|
|
ask(message) { |
|
|
|
if(_debug==1) |
|
alert("203 ask ========================== "+message); |
|
|
|
this.getMiiboResponse(message); |
|
} |
|
|
|
async getMiiboResponse(utterance) { |
|
if(_debug==1) |
|
alert("209 getMiiboResponse"+utterance) |
|
const params = { |
|
api_key: this.miiboOptions.api_key, |
|
agent_id: this.miiboOptions.agent_id, |
|
uid: this.miiboOptions.user_id, |
|
stream: true, |
|
utterance: utterance |
|
}; |
|
|
|
try { |
|
const res = await fetch("https://api-mebo.dev/api", { |
|
method: "POST", |
|
headers: { "Content-Type": "application/json" }, |
|
body: JSON.stringify(params), |
|
}); |
|
|
|
const reader = res.body.getReader(); |
|
const decoder = new TextDecoder(); |
|
let output = ""; |
|
let sentences = []; |
|
let current_index = 0; |
|
|
|
const read = async () => { |
|
const { done, value } = await reader.read(); |
|
if (done) return; |
|
|
|
let dataString = decoder.decode(value).split("\n").filter(x => x != ""); |
|
|
|
try { |
|
let responseData = JSON.parse(dataString[dataString.length - 1]); |
|
|
|
output = responseData.bestResponse.utterance.split("\n").filter(x => x.trim() != "").join("\n"); |
|
sentences = output.replace(/[。、\.\,\!\?!?]/,".").split(".") |
|
if (this.didOptions.priority == "speed" && current_index == 0 && current_index + 1 < sentences.length) { |
|
this.startTalk(sentences[current_index++]) |
|
} |
|
} catch(e) { |
|
console.log(e); |
|
} |
|
|
|
return read(); |
|
}; |
|
|
|
await read(); |
|
reader.releaseLock(); |
|
|
|
this.startTalk(sentences.slice(current_index).join("。")); |
|
} catch(error) { |
|
console.log("Error fetching AI response: ", error); |
|
} |
|
} |
|
|
|
async startTalk(input) { |
|
if(_debug==1) |
|
alert("start streaming id==============="+input) |
|
if (this.peerConnection?.signalingState === 'stable' || this.peerConnection?.iceConnectionState === 'connected') { |
|
|
|
const gender = this.didOptions.presenter.gender; |
|
let voice_id = this.didOptions.presenter.voice_id || ""; |
|
|
|
if (voice_id == "") { |
|
switch (this.languageCode) { |
|
case "en-us": |
|
voice_id = gender == "male" ? "en-US-GuyNeural" : "en-US-AriaNeural" |
|
break; |
|
case "ko-kr": |
|
voice_id = gender == "male" ? "ko-KR-InJoonNeural" : "ko-KR-YuJinNeural" |
|
break; |
|
case "cmn-CN": |
|
voice_id = gender == "male" ? "zh-CN-YunjianNeural" : "zh-CN-XiaohanNeural" |
|
break; |
|
default: |
|
voice_id = gender == "male" ? "ja-JP-KeitaNeural" : "ja-JP-NanamiNeural" |
|
} |
|
} |
|
alert("start tolak 330") |
|
speak(input) |
|
const requestOptions = { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
script: { |
|
type: "text", |
|
subtitles: false, |
|
provider: { |
|
type: "microsoft", |
|
voice_id: voice_id |
|
}, |
|
ssml: false, |
|
input: input |
|
}, |
|
config: { |
|
fluent: false, |
|
pad_audio: 0, |
|
align_driver: false, |
|
stitch: false, |
|
auto_match: false, |
|
sharpen: false, |
|
normalization_factor: 0 |
|
}, |
|
session_id: this.sessionId, |
|
}), |
|
}; |
|
|
|
if (this.didOptions.service === 'clips') { |
|
requestOptions.body.background = { color: '#FFFFFF' }; |
|
} |
|
|
|
try { |
|
|
|
|
|
startTyping({ |
|
el: "#aiResponseUtterance", |
|
string: input, |
|
speed: 50 |
|
}); |
|
|
|
|
|
} catch (error) { |
|
console.error('Error starting talk:', error); |
|
|
|
} |
|
} |
|
} |
|
|
|
async destoryTalk(input) { |
|
try { |
|
await fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}`, { |
|
method: 'DELETE', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ session_id: this.sessionId }), |
|
}); |
|
|
|
await this.stopAllStreams(); |
|
await this.closePC(); |
|
} catch (error) { |
|
console.error('Error destroying talk:', error); |
|
|
|
} |
|
} |
|
|
|
onIceCandidate(event) { |
|
if (event.candidate) { |
|
const { candidate, sdpMid, sdpMLineIndex } = event.candidate; |
|
|
|
fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}/ice`, { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
candidate, |
|
sdpMid, |
|
sdpMLineIndex, |
|
session_id: this.sessionId, |
|
}), |
|
}).catch((error) => { |
|
console.error('Error sending ICE candidate:', error); |
|
|
|
}); |
|
} |
|
} |
|
|
|
onIceConnectionStateChange() { |
|
if (this.peerConnection.iceConnectionState === 'failed' || this.peerConnection.iceConnectionState === 'closed') { |
|
this.stopAllStreams(); |
|
this.closePC(); |
|
} |
|
} |
|
|
|
onTrack(event) { |
|
if (!event.track || !event.streams || event.streams.length === 0) return; |
|
|
|
this.statsIntervalId = setInterval(async () => { |
|
const stats = await this.peerConnection.getStats(event.track); |
|
stats.forEach((report) => { |
|
if (report.type === 'inbound-rtp' && report.mediaType === 'video') { |
|
const videoStatusChanged = this.videoIsPlaying !== report.bytesReceived > this.lastBytesReceived; |
|
|
|
if (videoStatusChanged) { |
|
this.videoIsPlaying = report.bytesReceived > this.lastBytesReceived; |
|
this.onVideoStatusChange(this.videoIsPlaying, event.streams[0]); |
|
} |
|
this.lastBytesReceived = report.bytesReceived; |
|
} |
|
}); |
|
}, 100); |
|
} |
|
|
|
onVideoStatusChange(videoIsPlaying, stream) { |
|
let status; |
|
if (videoIsPlaying) { |
|
status = 'streaming'; |
|
const remoteStream = stream; |
|
this.streams.push(remoteStream); |
|
this.checkSpeaching(); |
|
} else { |
|
status = 'empty'; |
|
this.speaching = false; |
|
this.processing = false; |
|
this.playIdleVideo(); |
|
} |
|
} |
|
|
|
checkSpeaching() { |
|
if (this.speaching) { |
|
setTimeout(() => {this.checkSpeaching()}, 20) |
|
} else { |
|
this.setVideoElement(this.streams.shift()); |
|
} |
|
} |
|
|
|
async createPeerConnection(offer, iceServers) { |
|
if (!this.peerConnection) { |
|
this.peerConnection = new RTCPeerConnection({ iceServers }); |
|
this.peerConnection.addEventListener('icecandidate', this.onIceCandidate.bind(this), true); |
|
this.peerConnection.addEventListener('iceconnectionstatechange', this.onIceConnectionStateChange.bind(this), true); |
|
this.peerConnection.addEventListener('track', this.onTrack.bind(this), true); |
|
} |
|
|
|
await this.peerConnection.setRemoteDescription(offer); |
|
const sessionClientAnswer = await this.peerConnection.createAnswer(); |
|
await this.peerConnection.setLocalDescription(sessionClientAnswer); |
|
return sessionClientAnswer; |
|
} |
|
|
|
|
|
setVideoElement(stream) { |
|
if (!stream) return; |
|
this.videoElement.srcObject = stream; |
|
this.videoElement.loop = false; |
|
|
|
|
|
if (this.videoElement.paused) { |
|
this.videoElement |
|
.play() |
|
.then((_) => {}) |
|
.catch((e) => {}); |
|
} |
|
} |
|
|
|
playIdleVideo() { |
|
this.videoElement.srcObject = undefined; |
|
this.videoElement.src = this.didOptions.presenter.idle_movie; |
|
this.videoElement.loop = true; |
|
} |
|
|
|
playLoadingVideo() { |
|
this.videoElement.srcObject = undefined; |
|
this.videoElement.src = this.didOptions.presenter.loading_movie; |
|
this.videoElement.loop = false; |
|
} |
|
|
|
stopAllStreams() { |
|
if (this.videoElement.srcObject) { |
|
this.videoElement.srcObject.getTracks().forEach((track) => track.stop()); |
|
this.videoElement.srcObject = null; |
|
} |
|
} |
|
|
|
closePC(pc = this.peerConnection) { |
|
if (!pc) return; |
|
pc.close(); |
|
pc.removeEventListener('icecandidate', this.onIceCandidate.bind(this), true); |
|
pc.removeEventListener('iceconnectionstatechange', this.onIceConnectionStateChange.bind(this), true); |
|
pc.removeEventListener('track', this.onTrack.bind(this), true); |
|
clearInterval(this.statsIntervalId); |
|
if (pc === this.peerConnection) { |
|
this.peerConnection = null; |
|
} |
|
} |
|
|
|
async fetchWithRetries(url, options, retries = 1) { |
|
const maxRetryCount = 3; |
|
const maxDelaySec = 4; |
|
try { |
|
return await fetch(url, options); |
|
} catch (err) { |
|
if (retries <= maxRetryCount) { |
|
const delay = Math.min(Math.pow(2, retries) / 4 + Math.random(), maxDelaySec) * 1000; |
|
await new Promise((resolve) => setTimeout(resolve, delay)); |
|
return this.fetchWithRetries(url, options, retries + 1); |
|
} else { |
|
throw new Error(`Max retries exceeded. error: ${err}`); |
|
} |
|
} |
|
} |
|
} |
|
|
|
const miiboAvatar = new MiiboAvatar({ |
|
container: "my-video", |
|
option: { |
|
miibo: { |
|
api_key: "9206bb9d-be50-46b9-8ba2-4441fee3fe7b190cb511a0c78", |
|
agent_id: "3cfb2749-a1fa-4a5b-a2c8-eb17aee77808190cb500dd1357", |
|
user_id: "user", |
|
}, |
|
d_id: { |
|
key: "bWl5YXRha2VuOTk3QGdtYWlsLmNvbQ:m3WhwvxpDS6nrr4YdW9wX", |
|
service: "talks", |
|
priority: "cost", |
|
presenter:{ |
|
gender: "male", |
|
image_url: "https://kenken999-fastapi-django-main-live.hf.space/static/sugi.jpg", |
|
idle_movie: "https://kenken999-fastapi-django-main-live.hf.space/static/miiboi.mp4", |
|
loading_movie: "https://kenken999-fastapi-django-main-live.hf.space/static/miibo.mp4", |
|
} |
|
} |
|
} |
|
}) |
|
|
|
miiboAvatar.autoRecognize(); |
|
|
|
const onClickSends = () => { |
|
|
|
let utterance = document.querySelector("#utterances"); |
|
|
|
|
|
miiboAvatar.autoRecognizeMessage(utterance.value) |
|
|
|
|
|
utterances.value = ""; |
|
} |
|
|
|
</script> |
|
<canvas id="my-live2d"></canvas> |
|
<script src="live2d.js"></script> |
|
<script src="index.js"></script> |
|
<div id="vtubers"> |
|
|
|
|
|
|
|
|
|
</div> |
|
<div id="aiResponse" class="aiResponseBox"> |
|
<p class="ai-response" id="aiResponseUtterance"></p> |
|
</div> |
|
<div class="bottomBox"> |
|
<p id="userComment"></p> |
|
<button id="startLiveButton" onclick="startLive();">LIVE開始</button> |
|
<div id="submit_forms"> |
|
<input type="text" id="utterances" /> |
|
<button id="sendButton" onclick="onClickSends();">送信</button> |
|
</div> |
|
</div> |
|
</body> |
|
|
|
</html> |