|
<html> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Miibo Avatar Sample</title> |
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/recorderjs/0.1.0/recorder.min.js" integrity="sha512-Dc8aBUPSsnAiEtyqTYZrldxDfs2FnS8cU7BVHIJ1m5atjKrtQCoPRIn3gsVbKm2qY8NwjpTVTnawoC4XBvEZiQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script> |
|
<style> |
|
|
|
|
|
|
|
|
|
|
|
|
|
</style> |
|
</head> |
|
<body> |
|
<video id="my-video" autoplay></video> |
|
|
|
<script> |
|
|
|
'use strict'; |
|
class MiiboAvatar { |
|
constructor(config) { |
|
this.container = config.container; |
|
this.speechToTextOptions = config.option.speech_to_text; |
|
this.miiboOptions = config.option.miibo; |
|
this.didOptions = config.option.d_id; |
|
this.initialize(); |
|
} |
|
|
|
initialize() { |
|
const RTCPeerConnection = window.RTCPeerConnection || window.webkitRTCPeerConnection || window.mozRTCPeerConnection; |
|
|
|
this.videoElement = document.getElementById(this.container); |
|
this.videoElement.setAttribute('playsinline', ''); |
|
this.playIdleVideo(); |
|
|
|
this.createNewStream(); |
|
this.rec = new webkitSpeechRecognition() |
|
|
|
this.speaching = false; |
|
this.processing = false; |
|
this.streams = []; |
|
} |
|
|
|
async createNewStream() { |
|
try { |
|
this.stopAllStreams(); |
|
this.closePC(); |
|
|
|
let presenter = {"source_url": this.didOptions.presenter.image_url} |
|
const sessionResponse = await this.fetchWithRetries(`https://api.d-id.com/${this.didOptions.service}/streams`, { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify(presenter), |
|
}); |
|
|
|
const { id: newStreamId, offer, ice_servers: iceServers, session_id: newSessionId } = await sessionResponse.json(); |
|
this.streamId = newStreamId; |
|
this.sessionId = newSessionId; |
|
|
|
try { |
|
this.sessionClientAnswer = await this.createPeerConnection(offer, iceServers); |
|
} catch (e) { |
|
console.log('Error creating peer connection:', e); |
|
this.stopAllStreams(); |
|
this.closePC(); |
|
return; |
|
} |
|
|
|
const sdpResponse = await fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}/sdp`, { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
answer: this.sessionClientAnswer, |
|
session_id: this.sessionId, |
|
}), |
|
}); |
|
|
|
|
|
} catch (error) { |
|
console.log('Error creating new stream:', error); |
|
|
|
} |
|
} |
|
|
|
speechRecogInit() { |
|
this.audioContext = new (window.AudioContext || window.webkitAudioContext)() |
|
|
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { |
|
navigator.mediaDevices.getUserMedia({audio: true}).then((stream) => { |
|
var input = this.audioContext.createMediaStreamSource(stream) |
|
this.audioContext.resume() |
|
this.recorder = new Recorder(input) |
|
}) |
|
} |
|
} |
|
|
|
|
|
startRecording() { |
|
this.recorder && this.recorder.record(); |
|
} |
|
|
|
stopRecording() { |
|
this.playLoadingVideo(); |
|
this.recorder && this.recorder.stop(); |
|
this.audioRecognize(); |
|
this.recorder.clear(); |
|
} |
|
|
|
audioRecognize() { |
|
this.recorder && this.recorder.exportWAV((blob) => { |
|
let reader = new FileReader() |
|
reader.onload = () => { |
|
let result = new Uint8Array(reader.result) |
|
|
|
let data = { |
|
"config": { |
|
"encoding": "LINEAR16", |
|
"languageCode": "ja-JP", |
|
"alternativeLanguageCodes": ["en-US"], |
|
"audio_channel_count": 2 |
|
}, |
|
"audio": { |
|
"content": this.arrayBufferToBase64(result) |
|
} |
|
} |
|
fetch('https://speech.googleapis.com/v1/speech:recognize?key=' + this.speechToTextOptions.api_key, { |
|
method: 'POST', |
|
headers: { |
|
'Content-Type': 'application/json; charset=utf-8' |
|
}, |
|
body: JSON.stringify(data) |
|
}).then((response) => { |
|
return response.text() |
|
}).then((text) => { |
|
let result_json = JSON.parse(text) |
|
text = result_json.results[0].alternatives[0].transcript; |
|
this.languageCode = result_json.results[0].languageCode; |
|
this.ask(text) |
|
}) |
|
} |
|
reader.readAsArrayBuffer(blob) |
|
}) |
|
} |
|
|
|
|
|
autoRecognize() { |
|
this.rec.continuous = false |
|
this.rec.interimResults = false |
|
this.rec.lang = 'ja-JP' |
|
|
|
this.rec.onresult = (e) => { |
|
this.processing = true |
|
this.playLoadingVideo(); |
|
|
|
this.rec.stop() |
|
|
|
for (var i = e.resultIndex; i < e.results.length; i++) { |
|
if (!e.results[i].isFinal) continue |
|
|
|
const { transcript } = e.results[i][0] |
|
this.ask(transcript); |
|
} |
|
} |
|
|
|
this.rec.onend = () => { this.autoRecognizeRestart() } |
|
this.rec.start() |
|
} |
|
|
|
autoRecognizeRestart() { |
|
if (this.processing) { |
|
setTimeout(() => {this.autoRecognizeRestart()}, 1000) |
|
} else { |
|
this.rec.start() |
|
} |
|
} |
|
|
|
arrayBufferToBase64(buffer) { |
|
let binary = '' |
|
let bytes = new Uint8Array(buffer); |
|
let len = bytes.byteLength |
|
for (let i = 0; i < len; i++) { |
|
binary += String.fromCharCode(bytes[i]) |
|
} |
|
return window.btoa(binary) |
|
} |
|
|
|
ask(message) { |
|
this.getMiiboResponse(message); |
|
} |
|
|
|
async getMiiboResponse(utterance) { |
|
const params = { |
|
api_key: this.miiboOptions.api_key, |
|
agent_id: this.miiboOptions.agent_id, |
|
uid: this.miiboOptions.user_id, |
|
stream: true, |
|
utterance: utterance |
|
}; |
|
|
|
try { |
|
const res = await fetch("https://api-mebo.dev/api", { |
|
method: "POST", |
|
headers: { "Content-Type": "application/json" }, |
|
body: JSON.stringify(params), |
|
}); |
|
|
|
const reader = res.body.getReader(); |
|
const decoder = new TextDecoder(); |
|
let output = ""; |
|
let sentences = []; |
|
let current_index = 0; |
|
|
|
const read = async () => { |
|
const { done, value } = await reader.read(); |
|
if (done) return; |
|
|
|
let dataString = decoder.decode(value).split("\n").filter(x => x != ""); |
|
|
|
try { |
|
let responseData = JSON.parse(dataString[dataString.length - 1]); |
|
|
|
output = responseData.bestResponse.utterance.split("\n").filter(x => x.trim() != "").join("\n"); |
|
sentences = output.replace(/[。、\.\,\!\?!?]/,".").split(".") |
|
if (this.didOptions.priority == "speed" && current_index == 0 && current_index + 1 < sentences.length) { |
|
this.startTalk(sentences[current_index++]) |
|
} |
|
} catch(e) { |
|
console.log(e); |
|
} |
|
|
|
return read(); |
|
}; |
|
|
|
await read(); |
|
reader.releaseLock(); |
|
|
|
this.startTalk(sentences.slice(current_index).join("。")); |
|
} catch(error) { |
|
console.log("Error fetching AI response: ", error); |
|
} |
|
} |
|
|
|
async startTalk(input) { |
|
if (this.peerConnection?.signalingState === 'stable' || this.peerConnection?.iceConnectionState === 'connected') { |
|
|
|
const gender = this.didOptions.presenter.gender; |
|
let voice_id = this.didOptions.presenter.voice_id || ""; |
|
|
|
if (voice_id == "") { |
|
switch (this.languageCode) { |
|
case "en-us": |
|
voice_id = gender == "male" ? "en-US-GuyNeural" : "en-US-AriaNeural" |
|
break; |
|
case "ko-kr": |
|
voice_id = gender == "male" ? "ko-KR-InJoonNeural" : "ko-KR-YuJinNeural" |
|
break; |
|
case "cmn-CN": |
|
voice_id = gender == "male" ? "zh-CN-YunjianNeural" : "zh-CN-XiaohanNeural" |
|
break; |
|
default: |
|
voice_id = gender == "male" ? "ja-JP-KeitaNeural" : "ja-JP-NanamiNeural" |
|
} |
|
} |
|
|
|
const requestOptions = { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
script: { |
|
type: "text", |
|
subtitles: false, |
|
provider: { |
|
type: "microsoft", |
|
voice_id: voice_id |
|
}, |
|
ssml: false, |
|
input: input |
|
}, |
|
config: { |
|
fluent: false, |
|
pad_audio: 0, |
|
align_driver: false, |
|
stitch: false, |
|
auto_match: false, |
|
sharpen: false, |
|
normalization_factor: 0 |
|
}, |
|
session_id: this.sessionId, |
|
}), |
|
}; |
|
|
|
if (this.didOptions.service === 'clips') { |
|
requestOptions.body.background = { color: '#FFFFFF' }; |
|
} |
|
|
|
try { |
|
const playResponse = await this.fetchWithRetries(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}`, requestOptions); |
|
|
|
} catch (error) { |
|
console.error('Error starting talk:', error); |
|
|
|
} |
|
} |
|
} |
|
|
|
async destoryTalk(input) { |
|
try { |
|
await fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}`, { |
|
method: 'DELETE', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ session_id: this.sessionId }), |
|
}); |
|
|
|
await this.stopAllStreams(); |
|
await this.closePC(); |
|
} catch (error) { |
|
console.error('Error destroying talk:', error); |
|
|
|
} |
|
} |
|
|
|
onIceCandidate(event) { |
|
if (event.candidate) { |
|
const { candidate, sdpMid, sdpMLineIndex } = event.candidate; |
|
|
|
fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}/ice`, { |
|
method: 'POST', |
|
headers: { |
|
Authorization: `Basic ${this.didOptions.key}`, |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
candidate, |
|
sdpMid, |
|
sdpMLineIndex, |
|
session_id: this.sessionId, |
|
}), |
|
}).catch((error) => { |
|
console.error('Error sending ICE candidate:', error); |
|
|
|
}); |
|
} |
|
} |
|
|
|
onIceConnectionStateChange() { |
|
if (this.peerConnection.iceConnectionState === 'failed' || this.peerConnection.iceConnectionState === 'closed') { |
|
this.stopAllStreams(); |
|
this.closePC(); |
|
} |
|
} |
|
|
|
onTrack(event) { |
|
if (!event.track || !event.streams || event.streams.length === 0) return; |
|
|
|
this.statsIntervalId = setInterval(async () => { |
|
const stats = await this.peerConnection.getStats(event.track); |
|
stats.forEach((report) => { |
|
if (report.type === 'inbound-rtp' && report.mediaType === 'video') { |
|
const videoStatusChanged = this.videoIsPlaying !== report.bytesReceived > this.lastBytesReceived; |
|
|
|
if (videoStatusChanged) { |
|
this.videoIsPlaying = report.bytesReceived > this.lastBytesReceived; |
|
this.onVideoStatusChange(this.videoIsPlaying, event.streams[0]); |
|
} |
|
this.lastBytesReceived = report.bytesReceived; |
|
} |
|
}); |
|
}, 100); |
|
} |
|
|
|
onVideoStatusChange(videoIsPlaying, stream) { |
|
let status; |
|
if (videoIsPlaying) { |
|
status = 'streaming'; |
|
const remoteStream = stream; |
|
this.streams.push(remoteStream); |
|
this.checkSpeaching(); |
|
} else { |
|
status = 'empty'; |
|
this.speaching = false; |
|
this.processing = false; |
|
this.playIdleVideo(); |
|
} |
|
} |
|
|
|
checkSpeaching() { |
|
if (this.speaching) { |
|
setTimeout(() => {this.checkSpeaching()}, 20) |
|
} else { |
|
this.setVideoElement(this.streams.shift()); |
|
} |
|
} |
|
|
|
async createPeerConnection(offer, iceServers) { |
|
if (!this.peerConnection) { |
|
this.peerConnection = new RTCPeerConnection({ iceServers }); |
|
this.peerConnection.addEventListener('icecandidate', this.onIceCandidate.bind(this), true); |
|
this.peerConnection.addEventListener('iceconnectionstatechange', this.onIceConnectionStateChange.bind(this), true); |
|
this.peerConnection.addEventListener('track', this.onTrack.bind(this), true); |
|
} |
|
|
|
await this.peerConnection.setRemoteDescription(offer); |
|
const sessionClientAnswer = await this.peerConnection.createAnswer(); |
|
await this.peerConnection.setLocalDescription(sessionClientAnswer); |
|
return sessionClientAnswer; |
|
} |
|
|
|
|
|
setVideoElement(stream) { |
|
if (!stream) return; |
|
this.videoElement.srcObject = stream; |
|
this.videoElement.loop = false; |
|
|
|
|
|
if (this.videoElement.paused) { |
|
this.videoElement |
|
.play() |
|
.then((_) => {}) |
|
.catch((e) => {}); |
|
} |
|
} |
|
|
|
playIdleVideo() { |
|
this.videoElement.srcObject = undefined; |
|
this.videoElement.src = this.didOptions.presenter.idle_movie; |
|
this.videoElement.loop = true; |
|
} |
|
|
|
playLoadingVideo() { |
|
this.videoElement.srcObject = undefined; |
|
this.videoElement.src = this.didOptions.presenter.loading_movie; |
|
this.videoElement.loop = false; |
|
} |
|
|
|
stopAllStreams() { |
|
if (this.videoElement.srcObject) { |
|
this.videoElement.srcObject.getTracks().forEach((track) => track.stop()); |
|
this.videoElement.srcObject = null; |
|
} |
|
} |
|
|
|
closePC(pc = this.peerConnection) { |
|
if (!pc) return; |
|
pc.close(); |
|
pc.removeEventListener('icecandidate', this.onIceCandidate.bind(this), true); |
|
pc.removeEventListener('iceconnectionstatechange', this.onIceConnectionStateChange.bind(this), true); |
|
pc.removeEventListener('track', this.onTrack.bind(this), true); |
|
clearInterval(this.statsIntervalId); |
|
if (pc === this.peerConnection) { |
|
this.peerConnection = null; |
|
} |
|
} |
|
|
|
async fetchWithRetries(url, options, retries = 1) { |
|
const maxRetryCount = 3; |
|
const maxDelaySec = 4; |
|
try { |
|
return await fetch(url, options); |
|
} catch (err) { |
|
if (retries <= maxRetryCount) { |
|
const delay = Math.min(Math.pow(2, retries) / 4 + Math.random(), maxDelaySec) * 1000; |
|
await new Promise((resolve) => setTimeout(resolve, delay)); |
|
return this.fetchWithRetries(url, options, retries + 1); |
|
} else { |
|
throw new Error(`Max retries exceeded. error: ${err}`); |
|
} |
|
} |
|
} |
|
} |
|
|
|
const miiboAvatar = new MiiboAvatar({ |
|
container: "my-video", |
|
option: { |
|
miibo: { |
|
api_key: "ed212f05-a0f2-4838-9bb3-26d318504a76190c9e5bc19f0", |
|
agent_id: "3e83a6b2-3c03-42b1-abc7-8a2dd97a8999190c7d1806120c", |
|
user_id: "user", |
|
}, |
|
d_id: { |
|
key: "bWl5YXRha2VuOTk4QGdtYWlsLmNvbQ:RlKS_8GI5oxwQ7PVyQuQF", |
|
service: "talks", |
|
priority: "cost", |
|
presenter:{ |
|
gender: "male", |
|
image_url: "https://kenken999-fastapi-django-main-live.hf.space/static/sugi.jpg", |
|
idle_movie: "https://github.com/miibo-takumori/resorces/raw/main/portorate-idle.mp4", |
|
loading_movie: "https://github.com/miibo-takumori/resorces/raw/main/portorate-loading.mp4", |
|
} |
|
} |
|
} |
|
}) |
|
|
|
miiboAvatar.autoRecognize(); |
|
</script> |
|
</body> |
|
</html> |