Spaces:

clr
/

pce

Sleeping

pce

File size: 2,510 Bytes

import json, os, requests, warnings, wave
warnings.filterwarnings("ignore")


# synthesise speech
# save 16khz mono wav file
# return path to wave file
#   saving word alignment timestamps is deprecating
def tiro(text,voice,save='./',tiroalign = False):

    # endpoint working 2023
    url = 'https://tts.tiro.is/v0/speech'
    headers = {'Content-Type': 'application/json'}
    # NOT working as of 07.2025


    # synthesis
    payload_tts = {
    "Engine": "standard",
    "LanguageCode": "is-IS",
    "OutputFormat": "pcm",
    "SampleRate":"16000",
    "Text": text,
    "VoiceId": voice
    }
    
    wname = save+voice+'.wav'
    tts_data = requests.post(url, headers=headers, json=payload_tts, verify=False)
    
    with wave.open(wname,'wb') as f:
        f.setnchannels(1)
        f.setframerate(16000)
        f.setsampwidth(2)
        f.writeframes(tts_data.content)



    # word time alignments
    # SKIP
    # tiro no longer intends to support this
    # and only does support it for 2 voices anyway
    payload_aln = {
    "Engine": "standard",
    "LanguageCode": "is-IS",
    "OutputFormat": "json",
    "SpeechMarkTypes": ["word"],
    "Text": text,
    "VoiceId": voice
    }
    aname = save+voice+'.json'
    
    if tiroalign:
        aln_data = requests.post(url, headers=headers, json=payload_aln, verify=False)
        with open(aname,'w') as f:
            f.write('{"alignments": [')
            f.write(aln_data.content.decode().replace('}\n{','},\n {'))
            f.write(']}')


    #return(os.path.abspath(wname),os.path.abspath(aname))
    return os.path.abspath(wname)


def grammatek(text,voice,save='./',UNUSED = False):

    # endpoint working 2025
    url = 'https://api.grammatek.com/tts/v0/speech'
    headers = {'Content-Type': 'application/json',
              'Accept': 'audio/mpeg,audio/x-wav,audio/ogg'}


    # synthesis
    payload_tts = {
    "Engine": "standard",
    "LanguageCode": "is-IS",
    "LexiconNames": [],
    "OutputFormat": "pcm",
    "SampleRate":"16000",
    "SpeechMarkTypes": [
    "word"
    ],
    "Text": text,
    "TextType": "text",
    "VoiceId": voice
    }

    wname = save+voice+'.wav'
    tts_data = requests.post(url, headers=headers, json=payload_tts, verify=False)
    
    with wave.open(wname,'wb') as f:
        f.setnchannels(1)
        f.setframerate(16000)
        f.setsampwidth(2)
        f.writeframes(tts_data.content)


    #return(os.path.abspath(wname),os.path.abspath(aname))
    return os.path.abspath(wname)