Tymec's picture
Model
391bd16
raw
history blame
1.02 kB
from __future__ import annotations
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from .base import Model
class TfidfLR(Model):
"""Sentiment analysis model using TF-IDF and Logistic Regression"""
def __init__(self):
self._pipeline = Pipeline(
[
(
"vectorize",
CountVectorizer(stop_words="english", ngram_range=(1, 2), max_features=10000),
),
("tfidf", TfidfTransformer()),
("clf", LogisticRegression(max_iter=1000, random_state=self.rng)),
],
memory=self.cache,
)
@property
def pipeline(self) -> Pipeline:
return self._pipeline
@property
def description(self) -> str:
return "TF-IDF with Logistic Regression"
def _predict(self, text: str) -> int:
return self.pipeline.predict([text])[0]