Spaces:

dayuian
/

VocabLine

Running

App Files Files Community

dayuian commited on Feb 18

Commit

1e165e2

verified ·

1 Parent(s): 3ed2d28

Create sentences.py

Browse files

Files changed (1) hide show

sentences.py +106 -0

sentences.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import sqlite3
+import os
+import random
+from ai_sentence import generate_sentence
+from vocab import get_words_from_source, get_word_info
+from tqdm import tqdm
+DATA_DIR = "./data"
+DB_PATH = os.path.join(DATA_DIR, "sentences.db")
+# 初始化資料庫（建表）
+def init_db():
+    conn = sqlite3.connect(DB_PATH)
+    c = conn.cursor()
+    c.execute('''
+    CREATE TABLE IF NOT EXISTS sentences (
+        word TEXT,
+        phonetic TEXT,
+        sentence TEXT,
+        source TEXT,
+        model TEXT,
+        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+        PRIMARY KEY (word, source, model)
+    )
+    ''')
+    conn.commit()
+    conn.close()
+# 查詢句庫中的某個單字的所有例句
+def get_sentences_by_word(word):
+    conn = sqlite3.connect(DB_PATH)
+    c = conn.cursor()
+    c.execute('SELECT word, phonetic, sentence, source, model FROM sentences WHERE word=?', (word,))
+    results = c.fetchall()
+    conn.close()
+    return results
+# 儲存句子到 SQLite
+def save_sentence(word, phonetic, sentence, source, model):
+    conn = sqlite3.connect(DB_PATH)
+    c = conn.cursor()
+    c.execute('''
+    INSERT INTO sentences (word, phonetic, sentence, source, model)
+    VALUES (?, ?, ?, ?, ?)
+    ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
+    ''', (word, phonetic, sentence, source, model))
+    conn.commit()
+    conn.close()
+# 隨機抽單字 + 查句庫 or GPT 生成例句
+def get_words_with_sentences(source, n):
+    try:
+        words = get_words_from_source(source)
+        selected_words = random.sample(words, n)
+        result_display = ""
+        for word_data in tqdm(selected_words, desc="處理單字"):
+            word = word_data['word']
+            phonetic = word_data['phonetic']
+            # 查詢句庫
+            sentence_records = get_sentences_by_word(word)
+            if sentence_records:
+                # 優先取 Tatoeba
+                sentence = ""
+                for rec in sentence_records:
+                    if rec[3] == "tatoeba":  # source 字段
+                        sentence = rec[2]  # sentence 字段
+                        break
+                if not sentence:
+                    sentence = sentence_records[0][2]
+                source_used = sentence_records[0][3]
+                model_used = sentence_records[0][4]
+            else:
+                # GPT 生成句子
+                sentence = generate_sentence(word, "EleutherAI/pythia-410m")
+                source_used = "ai"
+                model_used = "EleutherAI/pythia-410m"
+                # 查詢音標，避免 GPT 生成時音標缺失
+                if not phonetic:
+                    word_info = get_word_info(source, word)
+                    phonetic = word_info['phonetic'] if word_info else ''
+                # 存回句庫
+                save_sentence(word, phonetic, sentence, source_used, model_used)
+            result_display += f"""
+            <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
+                <strong>單字：</strong> {word} <br>
+                <strong>音標：</strong> {phonetic or '無'} <br>
+                <strong>句子：</strong> {sentence} <br>
+                <strong>來源：</strong> {source_used} {f"({model_used})" if model_used else ""}
+            </div>
+            """
+        return result_display, f"✅ 成功抽取 {n} 個單字 & 句子"
+    except Exception as e:
+        return f"<p style='color:red;'>❌ 發生錯誤：{str(e)}</p>", f"❌ 錯誤：{str(e)}"