Jiahuita
/

NewsSourceClassification

@@ -56,9 +56,8 @@ You can use this model directly with a FastAPI endpoint:
 ```python
 import requests
-# Make a prediction
 response = requests.post(
-    "https://huggingface.co/Jiahuita/NewsSourceClassification/predict",
     json={"text": "Your news headline here"}
 )
 print(response.json())

 ```python
 import requests
 response = requests.post(
+    "https://huggingface.co/Jiahuita/NewsSourceClassification",
     json={"text": "Your news headline here"}
 )
 print(response.json())

app.py DELETED Viewed

@@ -1,84 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from transformers import Pipeline
-import tensorflow as tf
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-import json
-import os
-class TextInput(BaseModel):
-    text: str
-app = FastAPI(
-    title="News Source Classifier",
-    description="A model to classify news headlines as either Fox News or NBC News",
-    version="1.0.0"
-)
-class NewsClassificationPipeline(Pipeline):
-    def __init__(self):
-        super().__init__()
-        model_path = os.path.join(os.path.dirname(__file__), 'news_classifier.h5')
-        self.model = tf.keras.models.load_model(model_path)
-        tokenizer_path = os.path.join(os.path.dirname(__file__), 'tokenizer.json')
-        with open(tokenizer_path, 'r') as f:
-            tokenizer_data = json.load(f)
-            self.tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(tokenizer_data)
-    def __call__(self, text):
-        if isinstance(text, str):
-            text = [text]
-        sequences = self.tokenizer.texts_to_sequences(text)
-        padded = pad_sequences(sequences, maxlen=128)
-        predictions = self.model.predict(padded)
-        results = []
-        for pred in predictions:
-            label = "foxnews" if pred[0] > 0.5 else "nbc"
-            score = float(pred[0] if label == "foxnews" else 1 - pred[0])
-            results.append({"label": label, "score": score})
-        return results[0] if len(results) == 1 else results
-try:
-    classifier = NewsClassificationPipeline()
-except Exception as e:
-    print(f"Error initializing model: {str(e)}")
-    raise
-@app.get("/")
-async def root():
-    return {
-        "message": "News Source Classification API",
-        "usage": "Send POST request to /predict with {'text': 'your news headline'}"
-    }
-@app.post("/predict")
-async def predict(input_data: TextInput):
-    try:
-        result = classifier(input_data.text)
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/examples")
-async def examples():
-    return {
-        "examples": [
-            {
-                "title": "Crime News Headline",
-                "text": "Wife of murdered Minnesota pastor hired 3 men to kill husband after affair: police"
-            },
-            {
-                "title": "Science News Headline",
-                "text": "Scientists discover breakthrough in renewable energy research"
-            },
-            {
-                "title": "Political News Headline",
-                "text": "Presidential candidates face off in heated debate over climate policies"
-            }
-        ]
-    }

config.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:459b49b28436622ac5c9f6e28171fb7d0acc9498e293876b5778846501c4ab94
-size 212

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae409354ee5a0f6edfd67b5b838c072be95c352a1e1faca73a2473ee8ac15253
+size 286

pipeline.py CHANGED Viewed

@@ -1,35 +1,53 @@
-from transformers import Pipeline
-import tensorflow as tf
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import json
-import os
-def load_tokenizer(tokenizer_path):
-    with open(tokenizer_path, 'r') as f:
-        return json.load(f)
-class NewsClassificationPipeline(Pipeline):
-    def __init__(self, model=None, tokenizer=None, **kwargs):
         super().__init__(**kwargs)
-        model_path = os.path.join(os.path.dirname(__file__), './news_classifier.h5')
-        self.model = tf.keras.models.load_model(model_path)
-        tokenizer_path = os.path.join(os.path.dirname(__file__), './tokenizer.json')
-        self.tokenizer_config = load_tokenizer(tokenizer_path)
-    def __call__(self, texts, **kwargs):
-        if isinstance(texts, str):
-            texts = [texts]
-        sequences = self.tokenizer.texts_to_sequences(texts)
-        padded = pad_sequences(sequences, maxlen=128)
         predictions = self.model.predict(padded)
         results = []
         for pred in predictions:
             label = "foxnews" if pred[0] > 0.5 else "nbc"
             score = float(pred[0] if label == "foxnews" else 1 - pred[0])
-            results.append({"label": label, "score": score})
-        return results[0] if isinstance(texts, str) else results

+from transformers import PreTrainedModel, PretrainedConfig
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing.text import tokenizer_from_json
 from tensorflow.keras.preprocessing.sequence import pad_sequences
+import numpy as np
 import json
+class NewsClassifierConfig(PretrainedConfig):
+    model_type = "news_classifier"
+    def __init__(
+        self,
+        max_length=128,
+        vocab_size=10000,
+        hidden_size=64,
+        num_labels=2,
+        **kwargs
+    ):
+        self.max_length = max_length
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_labels = num_labels
         super().__init__(**kwargs)
+class NewsClassifier(PreTrainedModel):
+    config_class = NewsClassifierConfig
+    base_model_prefix = "news_classifier"
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = load_model('news_classifier.h5')
+        with open('tokenizer.json', 'r') as f:
+            tokenizer_data = json.load(f)
+            self.tokenizer = tokenizer_from_json(tokenizer_data)
+    def forward(self, text_input):
+        if isinstance(text_input, str):
+            text_input = [text_input]
+        sequences = self.tokenizer.texts_to_sequences(text_input)
+        padded = pad_sequences(sequences, maxlen=self.config.max_length)
         predictions = self.model.predict(padded)
         results = []
         for pred in predictions:
             label = "foxnews" if pred[0] > 0.5 else "nbc"
             score = float(pred[0] if label == "foxnews" else 1 - pred[0])
+            results.append({
+                "label": label,
+                "score": score
+            })
+        return results[0] if len(text_input) == 1 else results