Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
ecc051b
1
Parent(s):
6452277
Add multi-language support
Browse files- .github/workflows/publish-docker.yml +9 -2
- README.md +6 -2
- client.py +10 -2
- main.py +2 -1
- static/main.js +4 -1
- templates/hello.html +14 -2
.github/workflows/publish-docker.yml
CHANGED
|
@@ -8,13 +8,20 @@ jobs:
|
|
| 8 |
steps:
|
| 9 |
- name: Check out the repo
|
| 10 |
uses: actions/checkout@v2
|
| 11 |
-
- name: Download model file
|
| 12 |
uses: dsaltares/fetch-gh-release-asset@master
|
| 13 |
with:
|
| 14 |
repo: "robinhad/voice-recognition-ua"
|
| 15 |
-
version: "
|
| 16 |
file: "uk.tflite"
|
| 17 |
token: ${{ secrets.YOUR_TOKEN }}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
- name: Push to GitHub Packages
|
| 19 |
uses: docker/build-push-action@v1
|
| 20 |
with:
|
|
|
|
| 8 |
steps:
|
| 9 |
- name: Check out the repo
|
| 10 |
uses: actions/checkout@v2
|
| 11 |
+
- name: Download Ukrainian model file
|
| 12 |
uses: dsaltares/fetch-gh-release-asset@master
|
| 13 |
with:
|
| 14 |
repo: "robinhad/voice-recognition-ua"
|
| 15 |
+
version: "v0.1"
|
| 16 |
file: "uk.tflite"
|
| 17 |
token: ${{ secrets.YOUR_TOKEN }}
|
| 18 |
+
- name: Download English model file
|
| 19 |
+
uses: dsaltares/fetch-gh-release-asset@master
|
| 20 |
+
with:
|
| 21 |
+
repo: "mozilla/DeepSpeech"
|
| 22 |
+
version: "v0.7.3"
|
| 23 |
+
file: "deepspeech-0.7.3-models.tflite"
|
| 24 |
+
token: ${{ secrets.YOUR_TOKEN }}
|
| 25 |
- name: Push to GitHub Packages
|
| 26 |
uses: docker/build-push-action@v1
|
| 27 |
with:
|
README.md
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
# voice-recognition-ua
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
```
|
| 5 |
export FLASK_APP=main.py
|
| 6 |
flask run
|
|
|
|
| 1 |
# voice-recognition-ua
|
| 2 |
+
How to run:
|
| 3 |
+
1. Make sure to download:
|
| 4 |
+
2. https://github.com/robinhad/voice-recognition-ua/releases/download/0.1/uk.tflite
|
| 5 |
+
3. https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.tflite
|
| 6 |
+
|
| 7 |
+
How to launch:
|
| 8 |
```
|
| 9 |
export FLASK_APP=main.py
|
| 10 |
flask run
|
client.py
CHANGED
|
@@ -89,10 +89,18 @@ class VersionAction(argparse.Action):
|
|
| 89 |
exit(0)
|
| 90 |
|
| 91 |
|
| 92 |
-
def client(audio_file):
|
| 93 |
model_load_start = timer()
|
| 94 |
# sphinx-doc: python_ref_model_start
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
# sphinx-doc: python_ref_model_stop
|
| 97 |
model_load_end = timer() - model_load_start
|
| 98 |
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
|
|
|
| 89 |
exit(0)
|
| 90 |
|
| 91 |
|
| 92 |
+
def client(audio_file, lang="uk"):
|
| 93 |
model_load_start = timer()
|
| 94 |
# sphinx-doc: python_ref_model_start
|
| 95 |
+
model_path = "uk.tflite"
|
| 96 |
+
if lang not in ["en", "uk"]:
|
| 97 |
+
lang = "uk"
|
| 98 |
+
if lang == "uk":
|
| 99 |
+
model_path = "./uk.tflite"
|
| 100 |
+
if lang == "en":
|
| 101 |
+
model_path = "./deepspeech-0.7.3-models.tflite"
|
| 102 |
+
print(lang)
|
| 103 |
+
ds = Model(model_path)
|
| 104 |
# sphinx-doc: python_ref_model_stop
|
| 105 |
model_load_end = timer() - model_load_start
|
| 106 |
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
main.py
CHANGED
|
@@ -13,10 +13,11 @@ def index():
|
|
| 13 |
@app.route('/recognize', methods=["POST"])
|
| 14 |
def recognize():
|
| 15 |
file = request.files['file']
|
|
|
|
| 16 |
audio = BytesIO()
|
| 17 |
file.save(audio)
|
| 18 |
audio.seek(0)
|
| 19 |
-
result = client(audio)
|
| 20 |
return result
|
| 21 |
|
| 22 |
|
|
|
|
| 13 |
@app.route('/recognize', methods=["POST"])
|
| 14 |
def recognize():
|
| 15 |
file = request.files['file']
|
| 16 |
+
lang = request.form["lang"]
|
| 17 |
audio = BytesIO()
|
| 18 |
file.save(audio)
|
| 19 |
audio.seek(0)
|
| 20 |
+
result = client(audio, lang)
|
| 21 |
return result
|
| 22 |
|
| 23 |
|
static/main.js
CHANGED
|
@@ -8,6 +8,7 @@ var AudioContext = window.AudioContext || window.webkitAudioContext;
|
|
| 8 |
var audioContext; //audio context to help us record
|
| 9 |
const resultNode = document.getElementById('result');
|
| 10 |
const actionButton = document.getElementById('action');
|
|
|
|
| 11 |
|
| 12 |
function resultProcess(data) {
|
| 13 |
resultNode.textContent = `Довжина тексту: ${data.length} \n
|
|
@@ -21,6 +22,7 @@ function exportWAV(blob) {
|
|
| 21 |
actionButton.textContent = "Обробляється..."
|
| 22 |
var data = new FormData()
|
| 23 |
data.append('file', blob);
|
|
|
|
| 24 |
fetch(`./recognize`, { method: "POST", body: data })
|
| 25 |
.then(response => response.text())
|
| 26 |
.then(resultProcess);
|
|
@@ -29,7 +31,8 @@ function record() {
|
|
| 29 |
|
| 30 |
var constraints = { audio: true, video: false }
|
| 31 |
navigator.mediaDevices.getUserMedia(constraints).then(function (stream) {
|
| 32 |
-
actionButton.textContent = "Запис..."
|
|
|
|
| 33 |
actionButton.disabled = true;
|
| 34 |
/*
|
| 35 |
create an audio context after getUserMedia is called
|
|
|
|
| 8 |
var audioContext; //audio context to help us record
|
| 9 |
const resultNode = document.getElementById('result');
|
| 10 |
const actionButton = document.getElementById('action');
|
| 11 |
+
const langSelector = document.getElementById('lang');
|
| 12 |
|
| 13 |
function resultProcess(data) {
|
| 14 |
resultNode.textContent = `Довжина тексту: ${data.length} \n
|
|
|
|
| 22 |
actionButton.textContent = "Обробляється..."
|
| 23 |
var data = new FormData()
|
| 24 |
data.append('file', blob);
|
| 25 |
+
data.append("lang", langSelector.value);
|
| 26 |
fetch(`./recognize`, { method: "POST", body: data })
|
| 27 |
.then(response => response.text())
|
| 28 |
.then(resultProcess);
|
|
|
|
| 31 |
|
| 32 |
var constraints = { audio: true, video: false }
|
| 33 |
navigator.mediaDevices.getUserMedia(constraints).then(function (stream) {
|
| 34 |
+
actionButton.textContent = "Запис...";
|
| 35 |
+
resultNode.textContent = "";
|
| 36 |
actionButton.disabled = true;
|
| 37 |
/*
|
| 38 |
create an audio context after getUserMedia is called
|
templates/hello.html
CHANGED
|
@@ -11,11 +11,23 @@
|
|
| 11 |
|
| 12 |
<body>
|
| 13 |
<div class="container">
|
| 14 |
-
<div class="text-center">
|
| 15 |
<h1>Демо розпізнавання української мови</h1>
|
| 16 |
<p>Говоріть 3 секунди після натискання на кнопку, тоді отримаєте результат</p>
|
| 17 |
-
|
| 18 |
<div id="result"></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
</div>
|
| 20 |
</div>
|
| 21 |
<script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
|
|
|
|
| 11 |
|
| 12 |
<body>
|
| 13 |
<div class="container">
|
| 14 |
+
<div class="col-12 col-md-8 col-sm-12 col-xl-6 mx-auto text-center">
|
| 15 |
<h1>Демо розпізнавання української мови</h1>
|
| 16 |
<p>Говоріть 3 секунди після натискання на кнопку, тоді отримаєте результат</p>
|
| 17 |
+
|
| 18 |
<div id="result"></div>
|
| 19 |
+
<div class="row no-gutters">
|
| 20 |
+
<div class="col-1 col-sm-2"> </div>
|
| 21 |
+
<div class="col-6">
|
| 22 |
+
<button class="btn btn-primary" id="action" onclick="handleAction()">Почати запис (3 сек)</button>
|
| 23 |
+
</div>
|
| 24 |
+
<div class="col-5 col-sm-4">
|
| 25 |
+
<select id="lang" class="browser-default custom-select">
|
| 26 |
+
<option selected value="uk">Українська</option>
|
| 27 |
+
<option value="en">Англійська</option>
|
| 28 |
+
</select>
|
| 29 |
+
</div>
|
| 30 |
+
</div>
|
| 31 |
</div>
|
| 32 |
</div>
|
| 33 |
<script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
|