diff --git "a/README.md" "b/README.md" --- "a/README.md" +++ "b/README.md" @@ -26,89 +26,15 @@ widget: - en - en lemmatized: - - - clear - - compelling - - evidence - - many - - major - - conclusion - - ipcc - - new - - religion - - constantly-changing - - holy - - book - - based - - evidence - - fabricated - - hockey - - stick - - graph - - purported - - abolish - - mediaeval - - warm - - period - - one - - example - - - holocene - - last - - 10k - - year - - sea - - level - - rising - - rate - - around - - 2mm - - per - - year - - climate - - change - - little - - '''s' - - simply - - long - - term - - inevitability - - end - - current - - interglacial - - return - - glacial - - period - - regret - - - china - - host - - u.n. - - climate - - talk - - next - - week - - first - - time - - promoting - - call - - ambitious - - plan - - boost - - energy - - efficiency - - curb - - emission - - supercharged - - growth - - mean - - even - - rapid - - efficiency - - gain - - cancel - - global - - effort - - combat - - climate - - change + - clear compelling evidence many major conclusion ipcc new religion constantly-changing + holy book based evidence fabricated hockey stick graph purported abolish mediaeval + warm period one example + - holocene last 10k year sea level rising rate around 2mm per year climate change + little 's simply long term inevitability end current interglacial return glacial + period regret + - china host u.n. climate talk next week first time promoting call ambitious plan + boost energy efficiency curb emission supercharged growth mean even rapid efficiency + gain cancel global effort combat climate change quote: - There is clear, compelling evidence that many of the major conclusions of the IPCC, your new religions constantly-changing Holy Book, are based on evidence @@ -240,16 +166,16 @@ This model is not ready to be used in production. | Hyperparameter | Value | |-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | memory | | -| steps | [('lemmatizer', FunctionTransformer(func=)), ('tfidf', TfidfVectorizer(max_df=0.95, min_df=2,
stop_words=['i', ',', 'you', '(', 'it', '\\', '*', 'that', 'do',
'needn', 'doesn', 'won', 'which', 'been', 'aren',
'more', 'while', 'until', 'hadn', '^', 'was', '=',
'their', 'o', 'are', 'by', "don't", 'ours',
'yourself', 'ma', ...],
tokenizer=)), ('rf', RandomForestClassifier(bootstrap=False, max_features=0.35, min_samples_leaf=4,
min_samples_split=7, n_estimators=155))] | +| steps | [('lemmatizer', FunctionTransformer(func=)), ('tfidf', TfidfVectorizer(max_df=0.95, min_df=2,
stop_words=['just', 'himself', 'most', '"', '|', 'where',
'didn', 'with', '(', 'at', 'once', 'she', 'do',
'below', 'y', 'having', 'about', '^', ')', '@',
'theirs', "wasn't", 'the', 'wouldn', 'for', 'after',
'to', "you'll", ';', 'over', ...],
tokenizer=)), ('rf', RandomForestClassifier())] | | transform_input | | | verbose | False | -| lemmatizer | FunctionTransformer(func=) | -| tfidf | TfidfVectorizer(max_df=0.95, min_df=2,
stop_words=['i', ',', 'you', '(', 'it', '\\', '*', 'that', 'do',
'needn', 'doesn', 'won', 'which', 'been', 'aren',
'more', 'while', 'until', 'hadn', '^', 'was', '=',
'their', 'o', 'are', 'by', "don't", 'ours',
'yourself', 'ma', ...],
tokenizer=) | -| rf | RandomForestClassifier(bootstrap=False, max_features=0.35, min_samples_leaf=4,
min_samples_split=7, n_estimators=155) | +| lemmatizer | FunctionTransformer(func=) | +| tfidf | TfidfVectorizer(max_df=0.95, min_df=2,
stop_words=['just', 'himself', 'most', '"', '|', 'where',
'didn', 'with', '(', 'at', 'once', 'she', 'do',
'below', 'y', 'having', 'about', '^', ')', '@',
'theirs', "wasn't", 'the', 'wouldn', 'for', 'after',
'to', "you'll", ';', 'over', ...],
tokenizer=) | +| rf | RandomForestClassifier() | | lemmatizer__accept_sparse | False | | lemmatizer__check_inverse | True | | lemmatizer__feature_names_out | | -| lemmatizer__func | | +| lemmatizer__func | | | lemmatizer__inv_kw_args | | | lemmatizer__inverse_func | | | lemmatizer__kw_args | | @@ -268,27 +194,27 @@ This model is not ready to be used in production. | tfidf__norm | l2 | | tfidf__preprocessor | | | tfidf__smooth_idf | True | -| tfidf__stop_words | ['i', ',', 'you', '(', 'it', '\\', '*', 'that', 'do', 'needn', 'doesn', 'won', 'which', 'been', 'aren', 'more', 'while', 'until', 'hadn', '^', 'was', '=', 'their', 'o', 'are', 'by', "don't", 'ours', 'yourself', 'ma', 'against', '~', 'just', 'her', 'hers', '`', 'through', 'if', "hadn't", "haven't", 'theirs', 'those', 'don', 'nor', 'its', "you'll", 'each', 'and', 'should', 'me', "shan't", 'she', 'were', 'to', 'hasn', "'", 'his', 'we', ';', 'over', 'couldn', 'into', 'y', 'our', 'down', "that'll", 'up', 'how', 'now', '"', 'myself', 'itself', 'them', '}', 'my', '.', 'because', 'did', "weren't", "should've", 'haven', 'once', '#', ']', '_', "you're", 'than', '/', "hasn't", 'll', 'the', '$', '{', 'so', 'mustn', 'only', 'they', 'off', "mightn't", 's', 'wasn', 'him', "wouldn't", 'not', 'mightn', '[', 'with', '-', 'some', 'didn', "isn't", "doesn't", "mustn't", 'most', 'isn', 'who', 'out', 'weren', 'being', '>', 'few', "you'd", 'wouldn', 'yourselves', 'has', '|', 'when', 'why', 'before', "aren't", '+', 'further', 'a', 'same', 'ain', 're', "she's", "it's", 'this', 'is', 'own', "shouldn't", 'ourselves', 'below', "needn't", 'very', 'your', ':', '@', 'having', 'doing', "wasn't", 'he', 'have', 'whom', 'as', 'no', 'all', 'where', "didn't", 'yours', 'other', 'or', '?', 'had', 'from', 'there', 'd', 'both', "won't", 'am', 'during', 'under', 't', 'again', 'here', 'too', '&', 'any', 'for', 'shan', 'of', 've', 'an', 'these', 'such', 'herself', 'at', 'can', 'be', 'does', 'about', "couldn't", 'themselves', 'above', 'will', '<', 'between', 'on', "you've", 'what', 'then', '!', 'in', 'himself', '%', ')', 'after', 'm', 'but', 'shouldn'] | +| tfidf__stop_words | ['just', 'himself', 'most', '"', '|', 'where', 'didn', 'with', '(', 'at', 'once', 'she', 'do', 'below', 'y', 'having', 'about', '^', ')', '@', 'theirs', "wasn't", 'the', 'wouldn', 'for', 'after', 'to', "you'll", ';', 'over', 'why', 'those', 'hers', 'should', 'their', 'is', 'weren', 'doesn', "you've", 'we', 'been', 'did', 'further', "shouldn't", 'during', '%', 'a', 'into', 'mustn', '=', 'under', 'are', 'very', 'don', 'm', 'before', 'i', 'too', 'you', 'which', 'such', 'o', "hadn't", 'what', 'of', 't', 've', 'them', '[', 'some', "don't", "isn't", 'his', 'd', 'mightn', '{', '/', 're', 'whom', 'now', 'same', 'above', 'own', 'was', 'in', 'up', 'shouldn', 'more', 'while', 'from', 'here', 'because', '+', 'were', 'each', "mightn't", "aren't", "it's", "you're", 'how', 'they', 'itself', 'wasn', 'aren', '<', 'as', 'ourselves', "doesn't", 'hadn', "won't", 'other', ':', "weren't", '*', 'he', 'hasn', "hasn't", "you'd", 'through', "wouldn't", '!', 'by', 'couldn', ',', 'these', '$', '~', 'any', 'when', "that'll", 'out', 'there', 'has', 'off', 'not', 'until', 'than', "didn't", 'will', "mustn't", 'am', 'that', 'll', 'yourselves', 'my', '`', '_', 'this', 'ma', 'her', 'an', 'its', 'herself', 'but', 'between', 'ain', 'nor', "shan't", '}', '?', 'had', 'themselves', 'our', "she's", 'against', 'or', 'few', "should've", '&', 'have', "haven't", "'", 'then', "couldn't", 'and', 'being', 'doing', ']', '.', '-', "needn't", 'yours', 'only', 'myself', 'all', 'does', 'haven', 's', 'again', 'no', 'won', '>', 'if', 'isn', 'me', 'so', 'yourself', 'be', 'can', '#', 'ours', 'both', 'down', 'shan', 'it', 'him', 'who', 'on', 'your', 'needn', '\\'] | | tfidf__strip_accents | | | tfidf__sublinear_tf | False | | tfidf__token_pattern | (?u)\b\w\w+\b | -| tfidf__tokenizer | | +| tfidf__tokenizer | | | tfidf__use_idf | True | | tfidf__vocabulary | | -| rf__bootstrap | False | +| rf__bootstrap | True | | rf__ccp_alpha | 0.0 | | rf__class_weight | | | rf__criterion | gini | | rf__max_depth | | -| rf__max_features | 0.35 | +| rf__max_features | sqrt | | rf__max_leaf_nodes | | | rf__max_samples | | | rf__min_impurity_decrease | 0.0 | -| rf__min_samples_leaf | 4 | -| rf__min_samples_split | 7 | +| rf__min_samples_leaf | 1 | +| rf__min_samples_split | 2 | | rf__min_weight_fraction_leaf | 0.0 | | rf__monotonic_cst | | -| rf__n_estimators | 155 | +| rf__n_estimators | 100 | | rf__n_jobs | | | rf__oob_score | False | | rf__random_state | | @@ -299,58 +225,58 @@ This model is not ready to be used in production. ### Model Plot -
Pipeline(steps=[('lemmatizer',FunctionTransformer(func=<function lemmatize_X at 0x7f7a77c96550>)),('tfidf',TfidfVectorizer(max_df=0.95, min_df=2,stop_words=['i', ',', 'you', '(', 'it', '\\','*', 'that', 'do', 'needn','doesn', 'won', 'which', 'been','aren', 'more', 'while', 'until','hadn', '^', 'was', '=', 'their','o', 'are', 'by', "don't", 'ours','yourself', 'ma', ...],tokenizer=<function tokenize_quote at 0x7f7b459159d0>)),('rf',RandomForestClassifier(bootstrap=False, max_features=0.35,min_samples_leaf=4, min_samples_split=7,n_estimators=155))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
+
Pipeline(steps=[('lemmatizer',FunctionTransformer(func=<function lemmatize_X at 0x7fb4a5fb33a0>)),('tfidf',TfidfVectorizer(max_df=0.95, min_df=2,stop_words=['just', 'himself', 'most', '"','|', 'where', 'didn', 'with', '(','at', 'once', 'she', 'do', 'below','y', 'having', 'about', '^', ')','@', 'theirs', "wasn't", 'the','wouldn', 'for', 'after', 'to',"you'll", ';', 'over', ...],tokenizer=<function tokenize_quote at 0x7fb4a5fb31f0>)),('rf', RandomForestClassifier())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
## Evaluation Results -| Metric | Value | -|----------|---------| -| accuracy | 0.8105 | -| f1 score | 0.8105 | +| Metric | Value | +|----------|----------| +| accuracy | 0.867925 | +| f1 score | 0.867925 | # How to Get Started with the Model