|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
import gradio as gr |
|
import numpy as np |
|
|
|
|
|
|
|
|
|
model_name = "atsnetwork/my-custom-tinyllama-chatbot" |
|
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = "right" |
|
|
|
|
|
|
|
def generate_response_with_dev_info(prompt, max_new_tokens=100, temperature=0.6, top_k=30): |
|
formatted_prompt = f"<s>[INST] {prompt} [/INST]" |
|
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=max_new_tokens, |
|
do_sample=True, |
|
temperature=temperature, |
|
top_k=top_k, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id, |
|
return_dict_in_generate=True, |
|
output_scores=True |
|
) |
|
|
|
generated_ids = outputs.sequences[0] |
|
generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True) |
|
|
|
|
|
answer = "" |
|
start_answer = generated_text.find("[/INST]") |
|
if start_answer != -1: |
|
answer = generated_text[start_answer + len("[/INST]"):].strip() |
|
if answer.endswith("</s>"): |
|
answer = answer[:-len("</s>")].strip() |
|
else: |
|
answer = generated_text.strip() |
|
|
|
|
|
avg_entropy = 0 |
|
total_generated_tokens = 0 |
|
if outputs.scores: |
|
for score_tensor in outputs.scores: |
|
probabilities = torch.softmax(score_tensor, dim=-1) |
|
|
|
epsilon = 1e-9 |
|
entropy = -torch.sum(probabilities * torch.log(probabilities + epsilon), dim=-1).item() |
|
avg_entropy += entropy |
|
total_generated_tokens += 1 |
|
|
|
if total_generated_tokens > 0: |
|
avg_entropy /= total_generated_tokens |
|
else: |
|
|
|
avg_entropy = float('inf') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
entropy_threshold = 1.0 |
|
|
|
if avg_entropy > entropy_threshold: |
|
confidence_status = "LOW_CONFIDENCE: Model mungkin tidak memiliki pola yang jelas (Entropy Tinggi)." |
|
else: |
|
confidence_status = "HIGH_CONFIDENCE" |
|
|
|
|
|
is_explicitly_unknown = False |
|
explicit_unknown_reason = "" |
|
unknown_phrases = [ |
|
"maaf, saya tidak memiliki informasi", |
|
"saya tidak familiar dengan", |
|
"di luar cakupan data pelatihan saya", |
|
"saya tidak tahu", |
|
"tidak dapat menemukan informasi" |
|
] |
|
answer_lower = answer.lower() |
|
for phrase in unknown_phrases: |
|
if phrase in answer_lower: |
|
is_explicitly_unknown = True |
|
explicit_unknown_reason = f"Model menggunakan frasa 'tidak tahu': '{phrase}'" |
|
break |
|
|
|
|
|
developer_info = { |
|
"confidence_score": f"{avg_entropy:.4f}", |
|
"confidence_status": confidence_status, |
|
"explicit_unknown_phrase_detected": is_explicitly_unknown, |
|
"explicit_unknown_reason": explicit_unknown_reason if is_explicitly_unknown else "Tidak ada frasa 'tidak tahu' eksplisit.", |
|
|
|
} |
|
|
|
|
|
|
|
return answer, developer_info |
|
|
|
|
|
|
|
|
|
def gradio_interface_fn(prompt): |
|
answer, dev_info = generate_response_with_dev_info(prompt) |
|
|
|
|
|
dev_info_str = "--- Developer Info ---\n" |
|
dev_info_str += f"Confidence Score (Entropy): {dev_info['confidence_score']} ({dev_info['confidence_status']})\n" |
|
dev_info_str += f"Explicit Unknown Phrase Detected: {dev_info['explicit_unknown_phrase_detected']}\n" |
|
dev_info_str += f"Reason: {dev_info['explicit_unknown_reason']}\n" |
|
|
|
|
|
return answer, dev_info_str |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface_fn, |
|
inputs=gr.Textbox(lines=2, label="Your Question"), |
|
outputs=[ |
|
gr.Textbox(label="Chatbot Response", lines=5), |
|
gr.Textbox(label="Developer Information", lines=5) |
|
], |
|
title="TinyLlama Custom Chatbot with Developer Insights π", |
|
description="Ask anything and get a response from the chatbot. Additional information for developers will be displayed below." |
|
) |
|
|
|
|
|
iface.launch() |