Spaces:
Runtime error
Runtime error
Merge branch 'staging'
Browse files- mathtext_fastapi/conversation_manager.py +4 -0
- mathtext_fastapi/nlu.py +70 -18
- scripts/bump_version.py +36 -0
- scripts/cleanpyc.sh +2 -0
- scripts/make_request.py +15 -12
- scripts/pin_requirements.py +62 -0
mathtext_fastapi/conversation_manager.py
CHANGED
@@ -39,6 +39,7 @@ def create_text_message(message_text, whatsapp_id):
|
|
39 |
"preview_url": False,
|
40 |
"recipient_type": "individual",
|
41 |
"to": whatsapp_id,
|
|
|
42 |
"type": "text",
|
43 |
"text": {
|
44 |
"body": message_text
|
@@ -136,6 +137,9 @@ def manage_math_quiz_fsm(user_message, contact_uuid, type):
|
|
136 |
|
137 |
# Make a completely new entry
|
138 |
if fsm_check.data == []:
|
|
|
|
|
|
|
139 |
if type == 'addition':
|
140 |
math_quiz_state_machine = MathQuizFSM()
|
141 |
else:
|
|
|
39 |
"preview_url": False,
|
40 |
"recipient_type": "individual",
|
41 |
"to": whatsapp_id,
|
42 |
+
# FIXME: Better to use "message_type" (but be careful with refactor)
|
43 |
"type": "text",
|
44 |
"text": {
|
45 |
"body": message_text
|
|
|
137 |
|
138 |
# Make a completely new entry
|
139 |
if fsm_check.data == []:
|
140 |
+
# FIXME: Try not to use the Python reserved keyword `type` as a variable name
|
141 |
+
# It's better to use `kind` or `convo_type` or `convo_name`
|
142 |
+
# And the variable `type` is not defined here so I don't understand how this is working at all.
|
143 |
if type == 'addition':
|
144 |
math_quiz_state_machine = MathQuizFSM()
|
145 |
else:
|
mathtext_fastapi/nlu.py
CHANGED
@@ -2,23 +2,32 @@ from collections.abc import Mapping
|
|
2 |
from logging import getLogger
|
3 |
import datetime as dt
|
4 |
from dateutil.parser import isoparse
|
5 |
-
import re
|
6 |
|
7 |
from fuzzywuzzy import fuzz
|
8 |
from mathtext_fastapi.logging import prepare_message_data_for_logging
|
9 |
from mathtext.sentiment import sentiment
|
10 |
-
from mathtext.text2int import text2int
|
11 |
-
from mathtext_fastapi.intent_classification import
|
12 |
|
13 |
log = getLogger(__name__)
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
-
def build_nlu_response_object(
|
19 |
""" Turns nlu results into an object to send back to Turn.io
|
20 |
Inputs
|
21 |
-
-
|
22 |
- data: str/int - the student message
|
23 |
- confidence: - the nlu confidence score (sentiment) or '' (integer)
|
24 |
|
@@ -28,7 +37,11 @@ def build_nlu_response_object(type, data, confidence):
|
|
28 |
>>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
|
29 |
{'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
|
30 |
"""
|
31 |
-
return {
|
|
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
# def test_for_float_or_int(message_data, message_text):
|
@@ -144,7 +157,22 @@ def payload_is_valid(payload_object):
|
|
144 |
"""
|
145 |
>>> payload_is_valid({'author_id': '+5555555', 'author_type': 'OWNER', 'contact_uuid': '3246-43ad-faf7qw-zsdhg-dgGdg', 'message_body': 'thirty one', 'message_direction': 'inbound', 'message_id': 'SDFGGwafada-DFASHA4aDGA', 'message_inserted_at': '2022-07-05T04:00:34.03352Z', 'message_updated_at': '2023-04-06T10:08:23.745072Z'})
|
146 |
True
|
|
|
|
|
|
|
147 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
return (
|
149 |
isinstance(payload_object, Mapping) and
|
150 |
isinstance(payload_object.get('author_id'), str) and
|
@@ -155,17 +183,40 @@ def payload_is_valid(payload_object):
|
|
155 |
isinstance(payload_object.get('message_id'), str) and
|
156 |
isinstance(payload_object.get('message_inserted_at'), str) and
|
157 |
isinstance(payload_object.get('message_updated_at'), str) and
|
158 |
-
isinstance(payload_object.get('message_inserted_at'), str)
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
dt.datetime
|
163 |
-
)
|
|
|
|
|
|
|
|
|
164 |
isinstance(
|
165 |
-
|
166 |
dt.datetime
|
167 |
-
)
|
168 |
-
|
|
|
|
|
|
|
169 |
|
170 |
|
171 |
def evaluate_message_with_nlu(message_data):
|
@@ -182,14 +233,15 @@ def evaluate_message_with_nlu(message_data):
|
|
182 |
log.info(f'Starting evaluate message: {message_data}')
|
183 |
|
184 |
if not payload_is_valid(message_data):
|
185 |
-
|
|
|
186 |
|
187 |
try:
|
188 |
message_text = str(message_data.get('message_body', ''))
|
189 |
except:
|
190 |
log.error(f'Invalid request payload: {message_data}')
|
191 |
# use python logging system to do this//
|
192 |
-
return {'type': 'error', 'data':
|
193 |
|
194 |
# Run intent classification only for keywords
|
195 |
intent_api_response = run_intent_classification(message_text)
|
@@ -199,7 +251,7 @@ def evaluate_message_with_nlu(message_data):
|
|
199 |
|
200 |
number_api_resp = text2int(message_text.lower())
|
201 |
|
202 |
-
if number_api_resp ==
|
203 |
# Run intent classification with logistic regression model
|
204 |
predicted_label = predict_message_intent(message_text)
|
205 |
if predicted_label['confidence'] > 0.01:
|
|
|
2 |
from logging import getLogger
|
3 |
import datetime as dt
|
4 |
from dateutil.parser import isoparse
|
|
|
5 |
|
6 |
from fuzzywuzzy import fuzz
|
7 |
from mathtext_fastapi.logging import prepare_message_data_for_logging
|
8 |
from mathtext.sentiment import sentiment
|
9 |
+
from mathtext.text2int import text2int, TOKENS2INT_ERROR_INT
|
10 |
+
from mathtext_fastapi.intent_classification import predict_message_intent
|
11 |
|
12 |
log = getLogger(__name__)
|
13 |
|
14 |
+
PAYLOAD_VALUE_TYPES = {
|
15 |
+
'author_id': str,
|
16 |
+
'author_type': str,
|
17 |
+
'contact_uuid': str,
|
18 |
+
'message_body': str,
|
19 |
+
'message_direction': str,
|
20 |
+
'message_id': str,
|
21 |
+
'message_inserted_at': str,
|
22 |
+
'message_updated_at': str,
|
23 |
+
'message_inserted_at': str,
|
24 |
+
}
|
25 |
|
26 |
|
27 |
+
def build_nlu_response_object(nlu_type, data, confidence):
|
28 |
""" Turns nlu results into an object to send back to Turn.io
|
29 |
Inputs
|
30 |
+
- nlu_type: str - the type of nlu run (integer or sentiment-analysis)
|
31 |
- data: str/int - the student message
|
32 |
- confidence: - the nlu confidence score (sentiment) or '' (integer)
|
33 |
|
|
|
37 |
>>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
|
38 |
{'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
|
39 |
"""
|
40 |
+
return {
|
41 |
+
'type': nlu_type,
|
42 |
+
'data': data,
|
43 |
+
'confidence': confidence
|
44 |
+
}
|
45 |
|
46 |
|
47 |
# def test_for_float_or_int(message_data, message_text):
|
|
|
157 |
"""
|
158 |
>>> payload_is_valid({'author_id': '+5555555', 'author_type': 'OWNER', 'contact_uuid': '3246-43ad-faf7qw-zsdhg-dgGdg', 'message_body': 'thirty one', 'message_direction': 'inbound', 'message_id': 'SDFGGwafada-DFASHA4aDGA', 'message_inserted_at': '2022-07-05T04:00:34.03352Z', 'message_updated_at': '2023-04-06T10:08:23.745072Z'})
|
159 |
True
|
160 |
+
|
161 |
+
>>> message: {'author_id': '@event.message._vnd.v1.chat.owner', 'author_type': '@event.message._vnd.v1.author.type', 'contact_uuid': '@event.message._vnd.v1.chat.contact_uuid', 'message_body': '@event.message.text.body', 'message_direction': '@event.message._vnd.v1.direction', 'message_id': '@event.message.id', 'message_inserted_at': '@event.message._vnd.v1.chat.inserted_at', 'message_updated_at': '@event.message._vnd.v1.chat.updated_at'}
|
162 |
+
False
|
163 |
"""
|
164 |
+
try:
|
165 |
+
isinstance(
|
166 |
+
isoparse(payload_object.get('message_inserted_at')),
|
167 |
+
dt.datetime
|
168 |
+
)
|
169 |
+
isinstance(
|
170 |
+
isoparse(payload_object.get('message_updated_at')),
|
171 |
+
dt.datetime
|
172 |
+
)
|
173 |
+
except ValueError:
|
174 |
+
return False
|
175 |
+
|
176 |
return (
|
177 |
isinstance(payload_object, Mapping) and
|
178 |
isinstance(payload_object.get('author_id'), str) and
|
|
|
183 |
isinstance(payload_object.get('message_id'), str) and
|
184 |
isinstance(payload_object.get('message_inserted_at'), str) and
|
185 |
isinstance(payload_object.get('message_updated_at'), str) and
|
186 |
+
isinstance(payload_object.get('message_inserted_at'), str)
|
187 |
+
)
|
188 |
+
|
189 |
+
|
190 |
+
def log_payload_errors(payload_object):
|
191 |
+
errors = []
|
192 |
+
try:
|
193 |
+
assert isinstance(payload_object, Mapping)
|
194 |
+
except Exception as e:
|
195 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
196 |
+
errors.append(e)
|
197 |
+
for k, typ in PAYLOAD_VALUE_TYPES.items():
|
198 |
+
try:
|
199 |
+
assert isinstance(payload_object.get(k), typ)
|
200 |
+
except Exception as e:
|
201 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
202 |
+
errors.append(e)
|
203 |
+
try:
|
204 |
+
assert isinstance(
|
205 |
+
dt.datetime.fromisoformat(payload_object.get('message_inserted_at')),
|
206 |
dt.datetime
|
207 |
+
)
|
208 |
+
except Exception as e:
|
209 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
210 |
+
errors.append(e)
|
211 |
+
try:
|
212 |
isinstance(
|
213 |
+
dt.datetime.fromisoformat(payload_object.get('message_updated_at')),
|
214 |
dt.datetime
|
215 |
+
)
|
216 |
+
except Exception as e:
|
217 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
218 |
+
errors.append(e)
|
219 |
+
return errors
|
220 |
|
221 |
|
222 |
def evaluate_message_with_nlu(message_data):
|
|
|
233 |
log.info(f'Starting evaluate message: {message_data}')
|
234 |
|
235 |
if not payload_is_valid(message_data):
|
236 |
+
log_payload_errors(message_data)
|
237 |
+
return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}
|
238 |
|
239 |
try:
|
240 |
message_text = str(message_data.get('message_body', ''))
|
241 |
except:
|
242 |
log.error(f'Invalid request payload: {message_data}')
|
243 |
# use python logging system to do this//
|
244 |
+
return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}
|
245 |
|
246 |
# Run intent classification only for keywords
|
247 |
intent_api_response = run_intent_classification(message_text)
|
|
|
251 |
|
252 |
number_api_resp = text2int(message_text.lower())
|
253 |
|
254 |
+
if number_api_resp == TOKENS2INT_ERROR_INT:
|
255 |
# Run intent classification with logistic regression model
|
256 |
predicted_label = predict_message_intent(message_text)
|
257 |
if predicted_label['confidence'] > 0.01:
|
scripts/bump_version.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
from pathlib import Path
|
3 |
+
import re
|
4 |
+
import shutil
|
5 |
+
|
6 |
+
BASE_DIR = Path(__file__).parent.parent
|
7 |
+
PYPROJECT_PATH = BASE_DIR / 'pyproject.toml'
|
8 |
+
PATTERN = re.compile(r'(version\s*=\s*)[\'"]?(\d(\.\d+)+)[\'"]?\s*')
|
9 |
+
|
10 |
+
if __name__ == '__main__':
|
11 |
+
verline = None
|
12 |
+
with PYPROJECT_PATH.open() as fin:
|
13 |
+
lines = []
|
14 |
+
verline = None
|
15 |
+
for line in fin:
|
16 |
+
lines.append(line)
|
17 |
+
if verline:
|
18 |
+
continue
|
19 |
+
match = PATTERN.match(line)
|
20 |
+
if match:
|
21 |
+
print(f'Found match.groups(): {dict(list(enumerate(match.groups())))}')
|
22 |
+
ver = [int(x) for x in match.groups()[1].split('.')]
|
23 |
+
print(f' Old ver: {ver}')
|
24 |
+
ver[-1] += 1
|
25 |
+
print(f' New ver: {ver}')
|
26 |
+
ver = '.'.join([str(x) for x in ver])
|
27 |
+
print(f' New ver str: {ver}')
|
28 |
+
verline = f'version = "{ver}"\n'
|
29 |
+
print(f' New ver line: {verline}')
|
30 |
+
lines[-1] = verline
|
31 |
+
print(f' New ver line: {lines[-1]}')
|
32 |
+
|
33 |
+
if verline:
|
34 |
+
shutil.copy(PYPROJECT_PATH, PYPROJECT_PATH.with_suffix('.toml.bak'))
|
35 |
+
with PYPROJECT_PATH.open('w') as fout:
|
36 |
+
fout.writelines(lines)
|
scripts/cleanpyc.sh
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
#!usr/bin/env bash
|
2 |
+
find . | grep -E "(/__pycache__$|\.pyc$|\.pyo$)" | xargs rm -rf
|
scripts/make_request.py
CHANGED
@@ -48,18 +48,21 @@ def run_simulated_request(endpoint, sample_answer, context=None):
|
|
48 |
print(f"Case: {sample_answer}")
|
49 |
b_string = add_message_text_to_sample_object(sample_answer)
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
# run_simulated_request('intent-classification', 'exit')
|
|
|
48 |
print(f"Case: {sample_answer}")
|
49 |
b_string = add_message_text_to_sample_object(sample_answer)
|
50 |
|
51 |
+
print("BSTRING")
|
52 |
+
print(b_string)
|
53 |
+
|
54 |
+
# if endpoint == 'sentiment-analysis' or endpoint == 'text2int' or endpoint =='intent-classification':
|
55 |
+
# request = requests.post(
|
56 |
+
# url=f'http://localhost:7860/{endpoint}',
|
57 |
+
# json={'content': sample_answer}
|
58 |
+
# ).json()
|
59 |
+
# else:
|
60 |
+
# request = requests.post(
|
61 |
+
# url=f'http://localhost:7860/{endpoint}',
|
62 |
+
# data=b_string
|
63 |
+
# ).json()
|
64 |
+
|
65 |
+
# print(request)
|
66 |
|
67 |
|
68 |
# run_simulated_request('intent-classification', 'exit')
|
scripts/pin_requirements.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Parse requirements.txt and pyproject.toml and move versions to pyproject.toml """
|
2 |
+
from pathlib import Path
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
import toml
|
6 |
+
|
7 |
+
def get_requirement_versions(path='requirements.txt'):
|
8 |
+
""" Read requirements.txt file and return dict of package versions """
|
9 |
+
path = Path(path or '')
|
10 |
+
if path.is_dir():
|
11 |
+
path = next(iter(path.glob('**/requirements.txt')))
|
12 |
+
reqdict = {}
|
13 |
+
text = Path(path).open().read()
|
14 |
+
for line in text.splitlines():
|
15 |
+
if line.strip():
|
16 |
+
match = re.match(r'([-_a-zA-Z0-9]+)\s*([ >=<~^,.rabc0-9]+)\s*', line)
|
17 |
+
if match:
|
18 |
+
name, ver = match.groups()
|
19 |
+
reqdict[name] = ver
|
20 |
+
return reqdict
|
21 |
+
|
22 |
+
|
23 |
+
def normalize_name(name):
|
24 |
+
return str(name).strip().replace('_', '-').replace(' ', '-').lower()
|
25 |
+
|
26 |
+
|
27 |
+
def pin_versions(pyproject='pyproject.toml', reqdict=None, overwrite=False):
|
28 |
+
if not reqdict or isinstance(reqdict, (str, Path)):
|
29 |
+
reqdict = get_requirement_versions(path=reqdict)
|
30 |
+
reqdict = {
|
31 |
+
normalize_name(k): v for (k, v) in
|
32 |
+
reqdict.items()
|
33 |
+
}
|
34 |
+
|
35 |
+
pyproj = toml.load(pyproject)
|
36 |
+
depdict = pyproj.get('tool', {}).get('poetry', {}).get('dependencies', {})
|
37 |
+
depdict = {
|
38 |
+
normalize_name(k): v for (k, v) in
|
39 |
+
depdict.items()
|
40 |
+
}
|
41 |
+
|
42 |
+
for name, spec in reqdict.items():
|
43 |
+
if name in depdict:
|
44 |
+
ver = depdict[name]
|
45 |
+
if isinstance(ver, str) and (overwrite or ver == '*'):
|
46 |
+
depdict[name] = spec
|
47 |
+
|
48 |
+
pyproj['tool']['poetry']['dependencies'] = depdict
|
49 |
+
overwrite = overwrite or (input(f'Overwrite {pyproject}?')[0].lower() == 'y')
|
50 |
+
if overwrite:
|
51 |
+
with open(pyproject, 'w') as stream:
|
52 |
+
toml.dump(pyproj, stream)
|
53 |
+
return pyproj
|
54 |
+
|
55 |
+
|
56 |
+
if __name__ == '__main__':
|
57 |
+
path = 'requirements.txt'
|
58 |
+
if sys.argv[1:]:
|
59 |
+
path = sys.argv[1]
|
60 |
+
pyproj = pin_versions(reqdict=path)
|
61 |
+
print(toml.dumps(pyproj))
|
62 |
+
|