resolverkatla commited on
Commit
fe81869
·
verified ·
1 Parent(s): 50944f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -20,8 +20,10 @@ def clean_text(text):
20
 
21
  texts_cleaned = [clean_text(t) for t in texts]
22
 
23
- # 3. Train/test split
24
- X_train, X_test, y_train, y_test = train_test_split(texts_cleaned, labels, test_size=0.2, random_state=42)
 
 
25
 
26
  # 4. Build model: TF-IDF + Logistic Regression
27
  model = make_pipeline(
 
20
 
21
  texts_cleaned = [clean_text(t) for t in texts]
22
 
23
+ # 3. Train/test split (use stratified sampling!)
24
+ X_train, X_test, y_train, y_test = train_test_split(
25
+ texts_cleaned, labels, test_size=0.2, random_state=42, stratify=labels
26
+ )
27
 
28
  # 4. Build model: TF-IDF + Logistic Regression
29
  model = make_pipeline(