Spaces:
Runtime error
Runtime error
Merge branch 'staging'
Browse files- mathtext_fastapi/conversation_manager.py +4 -0
- mathtext_fastapi/nlu.py +70 -18
- scripts/bump_version.py +36 -0
- scripts/cleanpyc.sh +2 -0
- scripts/make_request.py +15 -12
- scripts/pin_requirements.py +62 -0
mathtext_fastapi/conversation_manager.py
CHANGED
|
@@ -39,6 +39,7 @@ def create_text_message(message_text, whatsapp_id):
|
|
| 39 |
"preview_url": False,
|
| 40 |
"recipient_type": "individual",
|
| 41 |
"to": whatsapp_id,
|
|
|
|
| 42 |
"type": "text",
|
| 43 |
"text": {
|
| 44 |
"body": message_text
|
|
@@ -136,6 +137,9 @@ def manage_math_quiz_fsm(user_message, contact_uuid, type):
|
|
| 136 |
|
| 137 |
# Make a completely new entry
|
| 138 |
if fsm_check.data == []:
|
|
|
|
|
|
|
|
|
|
| 139 |
if type == 'addition':
|
| 140 |
math_quiz_state_machine = MathQuizFSM()
|
| 141 |
else:
|
|
|
|
| 39 |
"preview_url": False,
|
| 40 |
"recipient_type": "individual",
|
| 41 |
"to": whatsapp_id,
|
| 42 |
+
# FIXME: Better to use "message_type" (but be careful with refactor)
|
| 43 |
"type": "text",
|
| 44 |
"text": {
|
| 45 |
"body": message_text
|
|
|
|
| 137 |
|
| 138 |
# Make a completely new entry
|
| 139 |
if fsm_check.data == []:
|
| 140 |
+
# FIXME: Try not to use the Python reserved keyword `type` as a variable name
|
| 141 |
+
# It's better to use `kind` or `convo_type` or `convo_name`
|
| 142 |
+
# And the variable `type` is not defined here so I don't understand how this is working at all.
|
| 143 |
if type == 'addition':
|
| 144 |
math_quiz_state_machine = MathQuizFSM()
|
| 145 |
else:
|
mathtext_fastapi/nlu.py
CHANGED
|
@@ -2,23 +2,32 @@ from collections.abc import Mapping
|
|
| 2 |
from logging import getLogger
|
| 3 |
import datetime as dt
|
| 4 |
from dateutil.parser import isoparse
|
| 5 |
-
import re
|
| 6 |
|
| 7 |
from fuzzywuzzy import fuzz
|
| 8 |
from mathtext_fastapi.logging import prepare_message_data_for_logging
|
| 9 |
from mathtext.sentiment import sentiment
|
| 10 |
-
from mathtext.text2int import text2int
|
| 11 |
-
from mathtext_fastapi.intent_classification import
|
| 12 |
|
| 13 |
log = getLogger(__name__)
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
-
def build_nlu_response_object(
|
| 19 |
""" Turns nlu results into an object to send back to Turn.io
|
| 20 |
Inputs
|
| 21 |
-
-
|
| 22 |
- data: str/int - the student message
|
| 23 |
- confidence: - the nlu confidence score (sentiment) or '' (integer)
|
| 24 |
|
|
@@ -28,7 +37,11 @@ def build_nlu_response_object(type, data, confidence):
|
|
| 28 |
>>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
|
| 29 |
{'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
|
| 30 |
"""
|
| 31 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
# def test_for_float_or_int(message_data, message_text):
|
|
@@ -144,7 +157,22 @@ def payload_is_valid(payload_object):
|
|
| 144 |
"""
|
| 145 |
>>> payload_is_valid({'author_id': '+5555555', 'author_type': 'OWNER', 'contact_uuid': '3246-43ad-faf7qw-zsdhg-dgGdg', 'message_body': 'thirty one', 'message_direction': 'inbound', 'message_id': 'SDFGGwafada-DFASHA4aDGA', 'message_inserted_at': '2022-07-05T04:00:34.03352Z', 'message_updated_at': '2023-04-06T10:08:23.745072Z'})
|
| 146 |
True
|
|
|
|
|
|
|
|
|
|
| 147 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
return (
|
| 149 |
isinstance(payload_object, Mapping) and
|
| 150 |
isinstance(payload_object.get('author_id'), str) and
|
|
@@ -155,17 +183,40 @@ def payload_is_valid(payload_object):
|
|
| 155 |
isinstance(payload_object.get('message_id'), str) and
|
| 156 |
isinstance(payload_object.get('message_inserted_at'), str) and
|
| 157 |
isinstance(payload_object.get('message_updated_at'), str) and
|
| 158 |
-
isinstance(payload_object.get('message_inserted_at'), str)
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
dt.datetime
|
| 163 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
isinstance(
|
| 165 |
-
|
| 166 |
dt.datetime
|
| 167 |
-
)
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
|
| 171 |
def evaluate_message_with_nlu(message_data):
|
|
@@ -182,14 +233,15 @@ def evaluate_message_with_nlu(message_data):
|
|
| 182 |
log.info(f'Starting evaluate message: {message_data}')
|
| 183 |
|
| 184 |
if not payload_is_valid(message_data):
|
| 185 |
-
|
|
|
|
| 186 |
|
| 187 |
try:
|
| 188 |
message_text = str(message_data.get('message_body', ''))
|
| 189 |
except:
|
| 190 |
log.error(f'Invalid request payload: {message_data}')
|
| 191 |
# use python logging system to do this//
|
| 192 |
-
return {'type': 'error', 'data':
|
| 193 |
|
| 194 |
# Run intent classification only for keywords
|
| 195 |
intent_api_response = run_intent_classification(message_text)
|
|
@@ -199,7 +251,7 @@ def evaluate_message_with_nlu(message_data):
|
|
| 199 |
|
| 200 |
number_api_resp = text2int(message_text.lower())
|
| 201 |
|
| 202 |
-
if number_api_resp ==
|
| 203 |
# Run intent classification with logistic regression model
|
| 204 |
predicted_label = predict_message_intent(message_text)
|
| 205 |
if predicted_label['confidence'] > 0.01:
|
|
|
|
| 2 |
from logging import getLogger
|
| 3 |
import datetime as dt
|
| 4 |
from dateutil.parser import isoparse
|
|
|
|
| 5 |
|
| 6 |
from fuzzywuzzy import fuzz
|
| 7 |
from mathtext_fastapi.logging import prepare_message_data_for_logging
|
| 8 |
from mathtext.sentiment import sentiment
|
| 9 |
+
from mathtext.text2int import text2int, TOKENS2INT_ERROR_INT
|
| 10 |
+
from mathtext_fastapi.intent_classification import predict_message_intent
|
| 11 |
|
| 12 |
log = getLogger(__name__)
|
| 13 |
|
| 14 |
+
PAYLOAD_VALUE_TYPES = {
|
| 15 |
+
'author_id': str,
|
| 16 |
+
'author_type': str,
|
| 17 |
+
'contact_uuid': str,
|
| 18 |
+
'message_body': str,
|
| 19 |
+
'message_direction': str,
|
| 20 |
+
'message_id': str,
|
| 21 |
+
'message_inserted_at': str,
|
| 22 |
+
'message_updated_at': str,
|
| 23 |
+
'message_inserted_at': str,
|
| 24 |
+
}
|
| 25 |
|
| 26 |
|
| 27 |
+
def build_nlu_response_object(nlu_type, data, confidence):
|
| 28 |
""" Turns nlu results into an object to send back to Turn.io
|
| 29 |
Inputs
|
| 30 |
+
- nlu_type: str - the type of nlu run (integer or sentiment-analysis)
|
| 31 |
- data: str/int - the student message
|
| 32 |
- confidence: - the nlu confidence score (sentiment) or '' (integer)
|
| 33 |
|
|
|
|
| 37 |
>>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
|
| 38 |
{'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
|
| 39 |
"""
|
| 40 |
+
return {
|
| 41 |
+
'type': nlu_type,
|
| 42 |
+
'data': data,
|
| 43 |
+
'confidence': confidence
|
| 44 |
+
}
|
| 45 |
|
| 46 |
|
| 47 |
# def test_for_float_or_int(message_data, message_text):
|
|
|
|
| 157 |
"""
|
| 158 |
>>> payload_is_valid({'author_id': '+5555555', 'author_type': 'OWNER', 'contact_uuid': '3246-43ad-faf7qw-zsdhg-dgGdg', 'message_body': 'thirty one', 'message_direction': 'inbound', 'message_id': 'SDFGGwafada-DFASHA4aDGA', 'message_inserted_at': '2022-07-05T04:00:34.03352Z', 'message_updated_at': '2023-04-06T10:08:23.745072Z'})
|
| 159 |
True
|
| 160 |
+
|
| 161 |
+
>>> message: {'author_id': '@event.message._vnd.v1.chat.owner', 'author_type': '@event.message._vnd.v1.author.type', 'contact_uuid': '@event.message._vnd.v1.chat.contact_uuid', 'message_body': '@event.message.text.body', 'message_direction': '@event.message._vnd.v1.direction', 'message_id': '@event.message.id', 'message_inserted_at': '@event.message._vnd.v1.chat.inserted_at', 'message_updated_at': '@event.message._vnd.v1.chat.updated_at'}
|
| 162 |
+
False
|
| 163 |
"""
|
| 164 |
+
try:
|
| 165 |
+
isinstance(
|
| 166 |
+
isoparse(payload_object.get('message_inserted_at')),
|
| 167 |
+
dt.datetime
|
| 168 |
+
)
|
| 169 |
+
isinstance(
|
| 170 |
+
isoparse(payload_object.get('message_updated_at')),
|
| 171 |
+
dt.datetime
|
| 172 |
+
)
|
| 173 |
+
except ValueError:
|
| 174 |
+
return False
|
| 175 |
+
|
| 176 |
return (
|
| 177 |
isinstance(payload_object, Mapping) and
|
| 178 |
isinstance(payload_object.get('author_id'), str) and
|
|
|
|
| 183 |
isinstance(payload_object.get('message_id'), str) and
|
| 184 |
isinstance(payload_object.get('message_inserted_at'), str) and
|
| 185 |
isinstance(payload_object.get('message_updated_at'), str) and
|
| 186 |
+
isinstance(payload_object.get('message_inserted_at'), str)
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def log_payload_errors(payload_object):
|
| 191 |
+
errors = []
|
| 192 |
+
try:
|
| 193 |
+
assert isinstance(payload_object, Mapping)
|
| 194 |
+
except Exception as e:
|
| 195 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
| 196 |
+
errors.append(e)
|
| 197 |
+
for k, typ in PAYLOAD_VALUE_TYPES.items():
|
| 198 |
+
try:
|
| 199 |
+
assert isinstance(payload_object.get(k), typ)
|
| 200 |
+
except Exception as e:
|
| 201 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
| 202 |
+
errors.append(e)
|
| 203 |
+
try:
|
| 204 |
+
assert isinstance(
|
| 205 |
+
dt.datetime.fromisoformat(payload_object.get('message_inserted_at')),
|
| 206 |
dt.datetime
|
| 207 |
+
)
|
| 208 |
+
except Exception as e:
|
| 209 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
| 210 |
+
errors.append(e)
|
| 211 |
+
try:
|
| 212 |
isinstance(
|
| 213 |
+
dt.datetime.fromisoformat(payload_object.get('message_updated_at')),
|
| 214 |
dt.datetime
|
| 215 |
+
)
|
| 216 |
+
except Exception as e:
|
| 217 |
+
log.error(f'Invalid HTTP request payload object: {e}')
|
| 218 |
+
errors.append(e)
|
| 219 |
+
return errors
|
| 220 |
|
| 221 |
|
| 222 |
def evaluate_message_with_nlu(message_data):
|
|
|
|
| 233 |
log.info(f'Starting evaluate message: {message_data}')
|
| 234 |
|
| 235 |
if not payload_is_valid(message_data):
|
| 236 |
+
log_payload_errors(message_data)
|
| 237 |
+
return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}
|
| 238 |
|
| 239 |
try:
|
| 240 |
message_text = str(message_data.get('message_body', ''))
|
| 241 |
except:
|
| 242 |
log.error(f'Invalid request payload: {message_data}')
|
| 243 |
# use python logging system to do this//
|
| 244 |
+
return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}
|
| 245 |
|
| 246 |
# Run intent classification only for keywords
|
| 247 |
intent_api_response = run_intent_classification(message_text)
|
|
|
|
| 251 |
|
| 252 |
number_api_resp = text2int(message_text.lower())
|
| 253 |
|
| 254 |
+
if number_api_resp == TOKENS2INT_ERROR_INT:
|
| 255 |
# Run intent classification with logistic regression model
|
| 256 |
predicted_label = predict_message_intent(message_text)
|
| 257 |
if predicted_label['confidence'] > 0.01:
|
scripts/bump_version.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import re
|
| 4 |
+
import shutil
|
| 5 |
+
|
| 6 |
+
BASE_DIR = Path(__file__).parent.parent
|
| 7 |
+
PYPROJECT_PATH = BASE_DIR / 'pyproject.toml'
|
| 8 |
+
PATTERN = re.compile(r'(version\s*=\s*)[\'"]?(\d(\.\d+)+)[\'"]?\s*')
|
| 9 |
+
|
| 10 |
+
if __name__ == '__main__':
|
| 11 |
+
verline = None
|
| 12 |
+
with PYPROJECT_PATH.open() as fin:
|
| 13 |
+
lines = []
|
| 14 |
+
verline = None
|
| 15 |
+
for line in fin:
|
| 16 |
+
lines.append(line)
|
| 17 |
+
if verline:
|
| 18 |
+
continue
|
| 19 |
+
match = PATTERN.match(line)
|
| 20 |
+
if match:
|
| 21 |
+
print(f'Found match.groups(): {dict(list(enumerate(match.groups())))}')
|
| 22 |
+
ver = [int(x) for x in match.groups()[1].split('.')]
|
| 23 |
+
print(f' Old ver: {ver}')
|
| 24 |
+
ver[-1] += 1
|
| 25 |
+
print(f' New ver: {ver}')
|
| 26 |
+
ver = '.'.join([str(x) for x in ver])
|
| 27 |
+
print(f' New ver str: {ver}')
|
| 28 |
+
verline = f'version = "{ver}"\n'
|
| 29 |
+
print(f' New ver line: {verline}')
|
| 30 |
+
lines[-1] = verline
|
| 31 |
+
print(f' New ver line: {lines[-1]}')
|
| 32 |
+
|
| 33 |
+
if verline:
|
| 34 |
+
shutil.copy(PYPROJECT_PATH, PYPROJECT_PATH.with_suffix('.toml.bak'))
|
| 35 |
+
with PYPROJECT_PATH.open('w') as fout:
|
| 36 |
+
fout.writelines(lines)
|
scripts/cleanpyc.sh
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!usr/bin/env bash
|
| 2 |
+
find . | grep -E "(/__pycache__$|\.pyc$|\.pyo$)" | xargs rm -rf
|
scripts/make_request.py
CHANGED
|
@@ -48,18 +48,21 @@ def run_simulated_request(endpoint, sample_answer, context=None):
|
|
| 48 |
print(f"Case: {sample_answer}")
|
| 49 |
b_string = add_message_text_to_sample_object(sample_answer)
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
# run_simulated_request('intent-classification', 'exit')
|
|
|
|
| 48 |
print(f"Case: {sample_answer}")
|
| 49 |
b_string = add_message_text_to_sample_object(sample_answer)
|
| 50 |
|
| 51 |
+
print("BSTRING")
|
| 52 |
+
print(b_string)
|
| 53 |
+
|
| 54 |
+
# if endpoint == 'sentiment-analysis' or endpoint == 'text2int' or endpoint =='intent-classification':
|
| 55 |
+
# request = requests.post(
|
| 56 |
+
# url=f'http://localhost:7860/{endpoint}',
|
| 57 |
+
# json={'content': sample_answer}
|
| 58 |
+
# ).json()
|
| 59 |
+
# else:
|
| 60 |
+
# request = requests.post(
|
| 61 |
+
# url=f'http://localhost:7860/{endpoint}',
|
| 62 |
+
# data=b_string
|
| 63 |
+
# ).json()
|
| 64 |
+
|
| 65 |
+
# print(request)
|
| 66 |
|
| 67 |
|
| 68 |
# run_simulated_request('intent-classification', 'exit')
|
scripts/pin_requirements.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Parse requirements.txt and pyproject.toml and move versions to pyproject.toml """
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
import toml
|
| 6 |
+
|
| 7 |
+
def get_requirement_versions(path='requirements.txt'):
|
| 8 |
+
""" Read requirements.txt file and return dict of package versions """
|
| 9 |
+
path = Path(path or '')
|
| 10 |
+
if path.is_dir():
|
| 11 |
+
path = next(iter(path.glob('**/requirements.txt')))
|
| 12 |
+
reqdict = {}
|
| 13 |
+
text = Path(path).open().read()
|
| 14 |
+
for line in text.splitlines():
|
| 15 |
+
if line.strip():
|
| 16 |
+
match = re.match(r'([-_a-zA-Z0-9]+)\s*([ >=<~^,.rabc0-9]+)\s*', line)
|
| 17 |
+
if match:
|
| 18 |
+
name, ver = match.groups()
|
| 19 |
+
reqdict[name] = ver
|
| 20 |
+
return reqdict
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def normalize_name(name):
|
| 24 |
+
return str(name).strip().replace('_', '-').replace(' ', '-').lower()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def pin_versions(pyproject='pyproject.toml', reqdict=None, overwrite=False):
|
| 28 |
+
if not reqdict or isinstance(reqdict, (str, Path)):
|
| 29 |
+
reqdict = get_requirement_versions(path=reqdict)
|
| 30 |
+
reqdict = {
|
| 31 |
+
normalize_name(k): v for (k, v) in
|
| 32 |
+
reqdict.items()
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
pyproj = toml.load(pyproject)
|
| 36 |
+
depdict = pyproj.get('tool', {}).get('poetry', {}).get('dependencies', {})
|
| 37 |
+
depdict = {
|
| 38 |
+
normalize_name(k): v for (k, v) in
|
| 39 |
+
depdict.items()
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
for name, spec in reqdict.items():
|
| 43 |
+
if name in depdict:
|
| 44 |
+
ver = depdict[name]
|
| 45 |
+
if isinstance(ver, str) and (overwrite or ver == '*'):
|
| 46 |
+
depdict[name] = spec
|
| 47 |
+
|
| 48 |
+
pyproj['tool']['poetry']['dependencies'] = depdict
|
| 49 |
+
overwrite = overwrite or (input(f'Overwrite {pyproject}?')[0].lower() == 'y')
|
| 50 |
+
if overwrite:
|
| 51 |
+
with open(pyproject, 'w') as stream:
|
| 52 |
+
toml.dump(pyproj, stream)
|
| 53 |
+
return pyproj
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
if __name__ == '__main__':
|
| 57 |
+
path = 'requirements.txt'
|
| 58 |
+
if sys.argv[1:]:
|
| 59 |
+
path = sys.argv[1]
|
| 60 |
+
pyproj = pin_versions(reqdict=path)
|
| 61 |
+
print(toml.dumps(pyproj))
|
| 62 |
+
|