dayuian commited on
Commit
e67e94c
·
verified ·
1 Parent(s): 70b7493

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -16
app.py CHANGED
@@ -3,56 +3,77 @@ import json
3
  import random
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import os
 
6
 
7
- # 模型初始化(Hugging Face Spaces會跑)
8
  model_name = "EleutherAI/pythia-410m"
9
-
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForCausalLM.from_pretrained(model_name)
12
 
13
- # 資料夾路徑
14
  DATA_DIR = "./data"
15
 
16
- # 核心函數:抽單字+造句
17
- def get_words_with_sentences(source="common3000", n=10):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
- # 動態讀取指定資料檔
20
  data_path = os.path.join(DATA_DIR, f"{source}.json")
21
  with open(data_path, 'r', encoding='utf-8') as f:
22
  words = json.load(f)
23
 
24
- # 隨機抽取
25
  selected_words = random.sample(words, n)
26
  results = []
27
 
28
- # 每個單字請 GPT 造句
29
- for word_data in selected_words:
30
  word = word_data['word']
31
- prompt = f"Write a simple English sentence using the word '{word}' suitable for beginners."
 
32
 
33
  inputs = tokenizer(prompt, return_tensors="pt")
34
  outputs = model.generate(**inputs, max_new_tokens=30)
35
  sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
 
 
 
37
  results.append({
38
  "word": word,
39
  "phonetic": word_data["phonetic"],
40
- "sentence": sentence
41
  })
42
 
43
- return results
 
44
 
45
  except Exception as e:
46
- return [{"error": f"發生錯誤: {str(e)}"}]
 
47
 
48
- # Gradio 介面設定
49
  demo = gr.Interface(
50
  fn=get_words_with_sentences,
51
  inputs=[
52
- gr.Textbox(value="common3000", label="選擇單字庫"),
53
  gr.Number(value=10, label="抽幾個單字")
54
  ],
55
- outputs="json"
 
 
 
56
  )
57
 
58
  demo.launch()
 
3
  import random
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import os
6
+ import re
7
 
8
+ # 模型初始化
9
  model_name = "EleutherAI/pythia-410m"
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForCausalLM.from_pretrained(model_name)
12
 
13
+ # 資料夾
14
  DATA_DIR = "./data"
15
 
16
+ # 自動掃描資料夾生成選單
17
+ def get_sources():
18
+ files = os.listdir(DATA_DIR)
19
+ sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
20
+ return sources
21
+
22
+ # 清理 GPT 生成句子的雜訊
23
+ def clean_sentence(output):
24
+ output = re.sub(r"Write.*?beginners\.", "", output, flags=re.IGNORECASE).strip()
25
+ output = re.sub(r"\*\*?\d+\.*\*\*", "", output).strip()
26
+ if not output.endswith("."):
27
+ output += "."
28
+ return output
29
+
30
+ # 核心函數
31
+ def get_words_with_sentences(source, n):
32
+ status = []
33
  try:
 
34
  data_path = os.path.join(DATA_DIR, f"{source}.json")
35
  with open(data_path, 'r', encoding='utf-8') as f:
36
  words = json.load(f)
37
 
 
38
  selected_words = random.sample(words, n)
39
  results = []
40
 
41
+ for i, word_data in enumerate(selected_words):
42
+ status.append(f"正在生成第 {i+1}/{n} 個單字 [{word_data['word']}] 例句...")
43
  word = word_data['word']
44
+
45
+ prompt = f"Use the word '{word}' in a simple English sentence suitable for beginners. Output only the sentence."
46
 
47
  inputs = tokenizer(prompt, return_tensors="pt")
48
  outputs = model.generate(**inputs, max_new_tokens=30)
49
  sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
 
51
+ clean_output = clean_sentence(sentence)
52
+
53
  results.append({
54
  "word": word,
55
  "phonetic": word_data["phonetic"],
56
+ "sentence": clean_output
57
  })
58
 
59
+ status.append("✅ 完成!")
60
+ return results, status
61
 
62
  except Exception as e:
63
+ status.append(f" 發生錯誤: {str(e)}")
64
+ return [], status
65
 
66
+ # Gradio 介面
67
  demo = gr.Interface(
68
  fn=get_words_with_sentences,
69
  inputs=[
70
+ gr.Dropdown(choices=get_sources(), value="common3000", label="選擇單字庫", interactive=True, show_clear_button=False),
71
  gr.Number(value=10, label="抽幾個單字")
72
  ],
73
+ outputs=[
74
+ gr.JSON(label="生成結果"),
75
+ gr.JSON(label="生成進度")
76
+ ]
77
  )
78
 
79
  demo.launch()