VocabLine / vocab.py
dayuian's picture
Update vocab.py
4f885c6 verified
raw
history blame
1.74 kB
import sqlite3
import os
import json
DATA_DIR = "./data"
DB_PATH = os.path.join(DATA_DIR, "sentences.db")
# 初始化資料表
def init_db():
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS sentences (
word TEXT,
phonetic TEXT,
sentence TEXT,
source TEXT,
model TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (word, source, model)
)
''')
conn.commit()
conn.close()
# 查某個單字的所有例句
def get_sentences_by_word(word):
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('SELECT word, phonetic, sentence, source, model FROM sentences WHERE word=?', (word,))
results = c.fetchall()
conn.close()
return results
# 儲存句子到 SQLite
def save_sentence(word, phonetic, sentence, source, model):
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''
INSERT INTO sentences (word, phonetic, sentence, source, model)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
''', (word, phonetic, sentence, source, model))
conn.commit()
conn.close()
# 掃描 /data 內的 JSON 檔,回傳單字庫名稱清單
def get_sources():
files = os.listdir(DATA_DIR)
sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
return sources
# 根據單字庫名稱,回傳該單字庫所有單字
def get_words_from_source(source):
data_path = os.path.join(DATA_DIR, f"{source}.json")
with open(data_path, 'r', encoding='utf-8') as f:
words = json.load(f)
return [w['word'] for w in words]