Spaces:

plipustel-llm-lab
/

idxstockBERT

Running

App Files Files Community

plipustel commited on Jun 15

Commit

1df3627

verified ·

1 Parent(s): c2a82aa

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -10

app.py CHANGED Viewed

@@ -1,21 +1,26 @@
 import torch
 from transformers import AutoTokenizer, AutoModel
 from model import IndoBERTMultitask
 import gradio as gr
-# Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("plipustel/idxstockBERT")
-# Load model
 base_model = AutoModel.from_pretrained("indobenchmark/indobert-base-p1")
 model = IndoBERTMultitask(base_model=base_model, num_event_labels=43)
-model.load_state_dict(torch.load("pytorch_model.bin", map_location="cpu"))
 model.eval()
 # Label mapping
 sentiment_map = {0: "Negatif", 1: "Positif", 2: "Netral"}
-# Event mapping (ganti sesuai mapping di dataset Tuan)
 event_map = {
     0: "corporate action", 1: "divestment", 2: "ipo", 3: "regulation", 4: "insider",
     5: "market outlook", 6: "forex", 7: "commodity", 8: "finance report", 9: "debt",
@@ -29,33 +34,27 @@ event_map = {
     40: "supply chain", 41: "geopolitical", 42: "misc"
 }
-# IDX sector labels
 idx_labels = [
     "idx_energy", "idx_basic", "idx_indust", "idx_noncyc", "idx_cyclic",
     "idx_health", "idx_finance", "idx_propert", "idx_techno", "idx_infra", "idx_trans"
 ]
-# Inference function
 def predict(text):
     with torch.no_grad():
         encoded = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
         out_sent, out_event, out_idx = model(encoded["input_ids"], encoded["attention_mask"])
-        # Sentiment
         sent_label = torch.argmax(out_sent, dim=1).item()
         sent_result = sentiment_map[sent_label]
-        # Event
         event_label = torch.argmax(out_event, dim=1).item()
         event_result = event_map.get(event_label, "unknown")
-        # IDX (multi-label)
         idx_probs = torch.sigmoid(out_idx).squeeze(0)
         idx_result = [label for i, label in enumerate(idx_labels) if idx_probs[i] > 0.5]
     return sent_result, event_result, ", ".join(idx_result)
-# Gradio UI
 iface = gr.Interface(
     fn=predict,
     inputs=gr.Textbox(lines=2, placeholder="Masukkan headline berita saham..."),

 import torch
 from transformers import AutoTokenizer, AutoModel
+from huggingface_hub import hf_hub_download
 from model import IndoBERTMultitask
 import gradio as gr
+# Load tokenizer dari Hugging Face Hub
 tokenizer = AutoTokenizer.from_pretrained("plipustel/idxstockBERT")
+# Load base IndoBERT
 base_model = AutoModel.from_pretrained("indobenchmark/indobert-base-p1")
+# Load model multitask custom
 model = IndoBERTMultitask(base_model=base_model, num_event_labels=43)
+# Download bobot model dari Hugging Face Hub
+model_path = hf_hub_download(repo_id="plipustel/idxstockBERT", filename="pytorch_model.bin")
+model.load_state_dict(torch.load(model_path, map_location="cpu"))
 model.eval()
 # Label mapping
 sentiment_map = {0: "Negatif", 1: "Positif", 2: "Netral"}
 event_map = {
     0: "corporate action", 1: "divestment", 2: "ipo", 3: "regulation", 4: "insider",
     5: "market outlook", 6: "forex", 7: "commodity", 8: "finance report", 9: "debt",
     40: "supply chain", 41: "geopolitical", 42: "misc"
 }
 idx_labels = [
     "idx_energy", "idx_basic", "idx_indust", "idx_noncyc", "idx_cyclic",
     "idx_health", "idx_finance", "idx_propert", "idx_techno", "idx_infra", "idx_trans"
 ]
 def predict(text):
     with torch.no_grad():
         encoded = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
         out_sent, out_event, out_idx = model(encoded["input_ids"], encoded["attention_mask"])
         sent_label = torch.argmax(out_sent, dim=1).item()
         sent_result = sentiment_map[sent_label]
         event_label = torch.argmax(out_event, dim=1).item()
         event_result = event_map.get(event_label, "unknown")
         idx_probs = torch.sigmoid(out_idx).squeeze(0)
         idx_result = [label for i, label in enumerate(idx_labels) if idx_probs[i] > 0.5]
     return sent_result, event_result, ", ".join(idx_result)
 iface = gr.Interface(
     fn=predict,
     inputs=gr.Textbox(lines=2, placeholder="Masukkan headline berita saham..."),