--[[ ReaSpeechControlsUI.lua - UI elements for configuring ASR services ]]-- ReaSpeechControlsUI = Polo { -- Copied from whisper.tokenizer.LANGUAGES LANGUAGES = { en = 'English', zh = 'Chinese', de = 'German', es = 'Spanish', ru = 'Russian', ko = 'Korean', fr = 'French', ja = 'Japanese', pt = 'Portuguese', tr = 'Turkish', pl = 'Polish', ca = 'Catalan', nl = 'Dutch', ar = 'Arabic', sv = 'Swedish', it = 'Italian', id = 'Indonesian', hi = 'Hindi', fi = 'Finnish', vi = 'Vietnamese', he = 'Hebrew', uk = 'Ukrainian', el = 'Greek', ms = 'Malay', cs = 'Czech', ro = 'Romanian', da = 'Danish', hu = 'Hungarian', ta = 'Tamil', no = 'Norwegian', th = 'Thai', ur = 'Urdu', hr = 'Croatian', bg = 'Bulgarian', lt = 'Lithuanian', la = 'Latin', mi = 'Maori', ml = 'Malayalam', cy = 'Welsh', sk = 'Slovak', te = 'Telugu', fa = 'Persian', lv = 'Latvian', bn = 'Bengali', sr = 'Serbian', az = 'Azerbaijani', sl = 'Slovenian', kn = 'Kannada', et = 'Estonian', mk = 'Macedonian', br = 'Breton', eu = 'Basque', is = 'Icelandic', hy = 'Armenian', ne = 'Nepali', mn = 'Mongolian', bs = 'Bosnian', kk = 'Kazakh', sq = 'Albanian', sw = 'Swahili', gl = 'Galician', mr = 'Marathi', pa = 'Punjabi', si = 'Sinhala', km = 'Khmer', sn = 'Shona', yo = 'Yoruba', so = 'Somali', af = 'Afrikaans', oc = 'Occitan', ka = 'Georgian', be = 'Belarusian', tg = 'Tajik', sd = 'Sindhi', gu = 'Gujarati', am = 'Amharic', yi = 'Yiddish', lo = 'Lao', uz = 'Uzbek', fo = 'Faroese', ht = 'Haitian Creole', ps = 'Pashto', tk = 'Turkmen', nn = 'Nynorsk', mt = 'Maltese', sa = 'Sanskrit', lb = 'Luxembourgish', my = 'Myanmar', bo = 'Tibetan', tl = 'Tagalog', mg = 'Malagasy', as = 'Assamese', tt = 'Tatar', haw = 'Hawaiian', ln = 'Lingala', ha = 'Hausa', ba = 'Bashkir', jw = 'Javanese', su = 'Sundanese' }, LANGUAGE_CODES = {}, DEFAULT_LANGUAGE = '', DEFAULT_MODEL_NAME = 'small', SIMPLE_MODEL_SIZES = { {'Small', 'small'}, {'Medium', 'medium'}, {'Large', 'distil-large-v3'}, }, COLUMN_PADDING = 15, MARGIN_BOTTOM = 5, MARGIN_LEFT = 115, MARGIN_RIGHT = 0, NARROW_COLUMN_WIDTH = 150, } ReaSpeechControlsUI._init_languages = function () for code, _ in pairs(ReaSpeechControlsUI.LANGUAGES) do table.insert(ReaSpeechControlsUI.LANGUAGE_CODES, code) end table.sort(ReaSpeechControlsUI.LANGUAGE_CODES, function (a, b) return ReaSpeechControlsUI.LANGUAGES[a] < ReaSpeechControlsUI.LANGUAGES[b] end) table.insert(ReaSpeechControlsUI.LANGUAGE_CODES, 1, '') ReaSpeechControlsUI.LANGUAGES[''] = 'Detect' end ReaSpeechControlsUI._init_languages() function ReaSpeechControlsUI:init() self.tab = 'simple' self.log_enable = false self.log_debug = false self.language = self.DEFAULT_LANGUAGE self.translate = false self.hotwords = '' self.initial_prompt = '' self.model_name = self.DEFAULT_MODEL_NAME self.vad_filter = true self:init_layouts() end function ReaSpeechControlsUI:get_request_data() return { language = self.language, translate = self.translate, hotwords = self.hotwords, initial_prompt = self.initial_prompt, model_name = self.model_name, vad_filter = self.vad_filter, } end function ReaSpeechControlsUI:init_layouts() self:init_simple_layouts() self:init_advanced_layouts() end function ReaSpeechControlsUI:init_simple_layouts() local with_button_color = function (selected, f) if selected then ImGui.PushStyleColor(ctx, ImGui.Col_Button(), Theme.colors.dark_gray_translucent) app:trap(f) ImGui.PopStyleColor(ctx) else f() end end self.model_sizes_layout = ColumnLayout.new { column_padding = self.COLUMN_PADDING, margin_bottom = self.MARGIN_BOTTOM, margin_left = self.MARGIN_LEFT, margin_right = self.MARGIN_RIGHT, num_columns = #self.SIMPLE_MODEL_SIZES, render_column = function (column) self:render_input_label(column.num == 1 and 'Model Size' or '') local label, model_name = table.unpack(self.SIMPLE_MODEL_SIZES[column.num]) with_button_color(self.model_name == model_name, function () if ImGui.Button(ctx, label, column.width) then self.model_name = model_name end end) end } end function ReaSpeechControlsUI:init_advanced_layouts() local renderers = { {self.render_model_name, self.render_hotwords, self.render_language}, {self.render_options, self.render_initial_prompt, self.render_logging}, } self.advanced_layouts = {} for row = 1, #renderers do self.advanced_layouts[row] = ColumnLayout.new { column_padding = self.COLUMN_PADDING, margin_bottom = self.MARGIN_BOTTOM, margin_left = self.MARGIN_LEFT, margin_right = self.MARGIN_RIGHT, num_columns = #renderers[row], render_column = function (column) ImGui.PushItemWidth(ctx, column.width) app:trap(function () renderers[row][column.num](self, column) end) ImGui.PopItemWidth(ctx) end } end end function ReaSpeechControlsUI:render() self:render_heading() if self.tab == 'advanced' then self:render_advanced() else self:render_simple() end ImGui.Separator(ctx) ImGui.Dummy(ctx, 0, 5) end function ReaSpeechControlsUI:render_heading() local init_x, init_y = ImGui.GetCursorPos(ctx) ImGui.SetCursorPosX(ctx, init_x - 20) app.png_from_bytes('reaspeech-logo-small') ImGui.SetCursorPos(ctx, init_x + self.MARGIN_LEFT + 2, init_y) self:render_tabs() ImGui.SetCursorPos(ctx, ImGui.GetWindowWidth(ctx) - 55, init_y) app.png_from_bytes('heading-logo-tech-audio') ImGui.SetCursorPos(ctx, init_x, init_y + 40) end function ReaSpeechControlsUI:render_tabs() if ImGui.BeginTabBar(ctx, '##tabs', ImGui.TabBarFlags_None()) then app:trap(function () if ImGui.BeginTabItem(ctx, 'Simple') then app:trap(function () self.tab = 'simple' end) ImGui.EndTabItem(ctx) end if ImGui.BeginTabItem(ctx, 'Advanced') then app:trap(function () self.tab = 'advanced' end) ImGui.EndTabItem(ctx) end end) ImGui.EndTabBar(ctx) end end function ReaSpeechControlsUI:render_simple() self:render_model_sizes() end function ReaSpeechControlsUI:render_advanced() for row = 1, #self.advanced_layouts do self.advanced_layouts[row]:render() end end function ReaSpeechControlsUI:render_input_label(text) ImGui.Text(ctx, text) ImGui.Dummy(ctx, 0, 0) end function ReaSpeechControlsUI:render_language(column) self:render_input_label('Language') if ImGui.BeginCombo(ctx, "##language", self.LANGUAGES[self.language]) then app:trap(function() local combo_items = self.LANGUAGE_CODES for _, combo_item in pairs(combo_items) do local is_selected = (combo_item == self.language) if ImGui.Selectable(ctx, self.LANGUAGES[combo_item], is_selected) then self.language = combo_item end end end) ImGui.EndCombo(ctx) end local translate_label = "Translate to English" if column.width < self.NARROW_COLUMN_WIDTH then translate_label = "Translate" end local rv, value = ImGui.Checkbox(ctx, translate_label, self.translate) if rv then self.translate = value end end function ReaSpeechControlsUI:render_model_name() self:render_input_label('Model Name') local rv, value = ImGui.InputTextWithHint(ctx, '##model_name', self.model_name or "") if rv then self.model_name = value end end function ReaSpeechControlsUI:render_model_sizes() self.model_sizes_layout:render() end function ReaSpeechControlsUI:render_hotwords() self:render_input_label('Hot Words') local rv, value = ImGui.InputText(ctx, '##hotwords', self.hotwords) if rv then self.hotwords = value end end function ReaSpeechControlsUI:render_options(column) self:render_input_label('Options') local vad_label = "Voice Activity Detection" if column.width < self.NARROW_COLUMN_WIDTH then vad_label = "VAD" end local rv, value = ImGui.Checkbox(ctx, vad_label, self.vad_filter) if rv then self.vad_filter = value end end function ReaSpeechControlsUI:render_logging() self:render_input_label('Logging') local rv, value = ImGui.Checkbox(ctx, "Enable", self.log_enable) if rv then self.log_enable = value end if self.log_enable then ImGui.SameLine(ctx) rv, value = ImGui.Checkbox(ctx, "Debug", self.log_debug) if rv then self.log_debug = value end end end function ReaSpeechControlsUI:render_initial_prompt() self:render_input_label('Initial Prompt') rv, value = ImGui.InputText(ctx, '##initial_prompt', self.initial_prompt) if rv then self.initial_prompt = value end end