kantundpeterpan commited on
Commit
0b492aa
·
1 Parent(s): 0cfe465

ready for text endpoint

Browse files
Files changed (1) hide show
  1. tasks/text.py +20 -17
tasks/text.py CHANGED
@@ -10,27 +10,27 @@ from huggingface_hub import hf_hub_download
10
  import joblib
11
 
12
  REPO_ID = "kantundpeterpan/frugal-ai-toy"
13
- FILENAME = "tfidf.skops"
14
 
15
- import nltk
16
- from nltk.tokenize import WordPunctTokenizer
17
- from nltk.stem import WordNetLemmatizer
18
- from nltk.corpus import stopwords
19
- import string
20
- nltk.download('stopwords')
21
 
22
- stop = set(stopwords.words('english') + list(string.punctuation))
23
 
24
- def tokenize_quote(r):
25
- tokens = nltk.word_tokenize(r.lower())
26
- cleaned = [word for word in tokens if word not in stop]
27
- return cleaned
28
 
29
- def lemmatize_tokens(tokens: list):
30
- return [lemmatizer.lemmatize(t) for t in tokens]
31
 
32
- def lemmatize_X(X):
33
- return X.quote.apply(tokenize_quote).apply(lemmatize_tokens).apply(lambda x: " ".join(x))
34
 
35
  import random
36
 
@@ -86,10 +86,13 @@ async def evaluate_text(request: TextEvaluationRequest):
86
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
87
  #--------------------------------------------------------------------------------------------
88
 
 
 
 
89
  #load model
90
  model = sio.load(
91
  hf_hub_download(repo_id=REPO_ID, filename=FILENAME),
92
- trusted=['__main__.lemmatize_X', '__main__.tokenize_quote']
93
  )
94
 
95
  # Make predictions
 
10
  import joblib
11
 
12
  REPO_ID = "kantundpeterpan/frugal-ai-toy"
13
+ FILENAME = "tfidf_rf.skops"
14
 
15
+ # import nltk
16
+ # from nltk.tokenize import WordPunctTokenizer
17
+ # from nltk.stem import WordNetLemmatizer
18
+ # from nltk.corpus import stopwords
19
+ # import string
20
+ # nltk.download('stopwords')
21
 
22
+ # stop = set(stopwords.words('english') + list(string.punctuation))
23
 
24
+ # def tokenize_quote(r):
25
+ # tokens = nltk.word_tokenize(r.lower())
26
+ # cleaned = [word for word in tokens if word not in stop]
27
+ # return cleaned
28
 
29
+ # def lemmatize_tokens(tokens: list):
30
+ # return [lemmatizer.lemmatize(t) for t in tokens]
31
 
32
+ # def lemmatize_X(X):
33
+ # return X.quote.apply(tokenize_quote).apply(lemmatize_tokens).apply(lambda x: " ".join(x))
34
 
35
  import random
36
 
 
86
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
87
  #--------------------------------------------------------------------------------------------
88
 
89
+ #get unknown types
90
+ unknown = skops.io.get_untrusted_types(file = hf_hub_download(repo_id=REPO_ID, filename=FILENAME))
91
+
92
  #load model
93
  model = sio.load(
94
  hf_hub_download(repo_id=REPO_ID, filename=FILENAME),
95
+ trusted=unknown
96
  )
97
 
98
  # Make predictions