Younes13 commited on
Commit
7ac6d00
·
verified ·
1 Parent(s): cddf140

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -12
app.py CHANGED
@@ -1,17 +1,22 @@
1
  import torch
2
  import gradio as gr
3
- from transformers import AutoTokenizer, AutoModel
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
  import json
7
  import os
8
 
9
- # بارگذاری مدل
10
- model_name = "HooshvareLab/PersianMind"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModel.from_pretrained(model_name)
13
 
14
- # مسیر فایل ذخیره‌سازی دائمی
 
 
 
 
 
15
  DATA_FILE = "faq_data.json"
16
  ADMIN_PASSWORD = "admin123" # رمز عبور ادمین
17
 
@@ -37,19 +42,33 @@ def save_faq_data():
37
  # پایگاه دانش و embedding‌ها
38
  faq_dict = load_faq_data()
39
  faq_questions = list(faq_dict.keys())
40
- faq_embeddings = []
41
 
42
- # تولید embedding
43
  def get_embedding(text):
44
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
45
  with torch.no_grad():
46
- outputs = model(**inputs)
47
  return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
48
 
49
  # ساخت embedding اولیه
50
  faq_embeddings = [get_embedding(q) for q in faq_questions]
51
 
52
- # پاسخ‌دهی
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def student_bot(user_question):
54
  try:
55
  user_emb = get_embedding(user_question)
@@ -60,7 +79,7 @@ def student_bot(user_question):
60
  if best_score > 0.6:
61
  return faq_dict[faq_questions[best_idx]]
62
  else:
63
- return "متأسفم، پاسخ این سؤال در حال حاضر موجود نیست."
64
  except Exception as e:
65
  return f"❗️خطا: {str(e)}"
66
 
@@ -100,3 +119,4 @@ with gr.Blocks() as demo:
100
  add_btn.click(fn=add_faq, inputs=[new_q, new_a, password], outputs=result)
101
 
102
  demo.launch()
 
 
1
  import torch
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
  import json
7
  import os
8
 
9
+ # 🔹 مدل embedding (برای تشخیص شباهت)
10
+ embedding_model_name = "HooshvareLab/bert-fa-base-uncased"
11
+ embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
12
+ embedding_model = AutoModel.from_pretrained(embedding_model_name)
13
 
14
+ # 🔹 مدل تولید (برای پاسخ جدید)
15
+ gen_model_name = "HooshvareLab/PersianMind"
16
+ gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_name)
17
+ gen_model = AutoModelForCausalLM.from_pretrained(gen_model_name)
18
+
19
+ # 🔹 مسیر فایل دیتابیس
20
  DATA_FILE = "faq_data.json"
21
  ADMIN_PASSWORD = "admin123" # رمز عبور ادمین
22
 
 
42
  # پایگاه دانش و embedding‌ها
43
  faq_dict = load_faq_data()
44
  faq_questions = list(faq_dict.keys())
 
45
 
46
+ # تابع تولید embedding
47
  def get_embedding(text):
48
+ inputs = embedding_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
49
  with torch.no_grad():
50
+ outputs = embedding_model(**inputs)
51
  return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
52
 
53
  # ساخت embedding اولیه
54
  faq_embeddings = [get_embedding(q) for q in faq_questions]
55
 
56
+ # تابع تولید پاسخ با PersianMind
57
+ def generate_with_persianmind(prompt):
58
+ inputs = gen_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)
59
+ with torch.no_grad():
60
+ output_ids = gen_model.generate(
61
+ inputs.input_ids,
62
+ max_length=200,
63
+ do_sample=True,
64
+ top_p=0.9,
65
+ temperature=0.8,
66
+ pad_token_id=gen_tokenizer.eos_token_id
67
+ )
68
+ answer = gen_tokenizer.decode(output_ids[0], skip_special_tokens=True)
69
+ return answer
70
+
71
+ # پاسخ‌دهی اصلی
72
  def student_bot(user_question):
73
  try:
74
  user_emb = get_embedding(user_question)
 
79
  if best_score > 0.6:
80
  return faq_dict[faq_questions[best_idx]]
81
  else:
82
+ return generate_with_persianmind(user_question)
83
  except Exception as e:
84
  return f"❗️خطا: {str(e)}"
85
 
 
119
  add_btn.click(fn=add_faq, inputs=[new_q, new_a, password], outputs=result)
120
 
121
  demo.launch()
122
+