huimanho commited on
Commit
cb771e6
·
verified ·
1 Parent(s): 0cc0e2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -6
app.py CHANGED
@@ -1,13 +1,22 @@
1
  from flask import Flask, request, render_template
2
  import pandas as pd
3
- import spacy-transformers
 
 
 
4
  from transformers import pipeline
5
 
6
  # Initialize Flask app
7
  app = Flask(__name__)
8
 
9
- # Load spaCy model for preprocessing
10
- nlp = spacy-transformers.load("en_core_web_sm")
 
 
 
 
 
 
11
 
12
  # Load Hugging Face pipelines
13
  sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
@@ -15,9 +24,11 @@ ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-e
15
 
16
  # Function to preprocess text
17
  def preprocess_text(text):
18
- doc = nlp(text)
19
- tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
20
- return ' '.join(tokens)
 
 
21
 
22
  @app.route('/')
23
  def home():
 
1
  from flask import Flask, request, render_template
2
  import pandas as pd
3
+ import nltk
4
+ from nltk.tokenize import word_tokenize
5
+ from nltk.corpus import stopwords
6
+ from nltk.stem import WordNetLemmatizer
7
  from transformers import pipeline
8
 
9
  # Initialize Flask app
10
  app = Flask(__name__)
11
 
12
+ # Download NLTK resources
13
+ nltk.download('punkt')
14
+ nltk.download('stopwords')
15
+ nltk.download('wordnet')
16
+
17
+ # Initialize NLTK components
18
+ lemmatizer = WordNetLemmatizer()
19
+ stop_words = set(stopwords.words('english'))
20
 
21
  # Load Hugging Face pipelines
22
  sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
 
24
 
25
  # Function to preprocess text
26
  def preprocess_text(text):
27
+ # Tokenize
28
+ tokens = word_tokenize(text)
29
+ # Remove stop words and lemmatize
30
+ cleaned_tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens if token.isalpha() and token.lower() not in stop_words]
31
+ return ' '.join(cleaned_tokens)
32
 
33
  @app.route('/')
34
  def home():