Spaces:

cetinca
/

fastapi-ep

Runtime error

App Files Files Community

cetinca commited on Jan 24, 2023

Commit

6784f95

verified ·

1 Parent(s): 10d0128

Update package import

Browse files

Files changed (7) hide show

.gitlab-ci.yml +4 -4
app.py +4 -3
modules/nlu.py +11 -14
modules/sentiment.py +0 -8
modules/text2int.py +0 -192
pyproject.toml +40 -0
requirements.txt +4 -12

.gitlab-ci.yml CHANGED Viewed

@@ -1,14 +1,14 @@
 # Official Python language image.
-test_py39:
-  image: python:3.9
   before_script:
     - python -v
     - pip install -r requirements.txt
   script:
     - pytest --verbose
-test_py38:
-  image: python:3.8
   before_script:
     - python -v
     - pip install -r requirements.txt

 # Official Python language image.
+test_py38:
+  image: python:3.8
   before_script:
     - python -v
     - pip install -r requirements.txt
   script:
     - pytest --verbose
+test_py39:
+  image: python:3.9
   before_script:
     - python -v
     - pip install -r requirements.txt

app.py CHANGED Viewed

@@ -8,9 +8,10 @@ from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from pydantic import BaseModel
-from modules.nlu import prepare_message_data_for_logging
 from mathtext.sentiment import sentiment
 from mathtext.text2int import text2int
 app = FastAPI()
@@ -47,7 +48,6 @@ def text2int_ep(content: Text = None):
     content = {"message": ml_response}
     return JSONResponse(content=content)
 @app.post("/nlu")
 async def evaluate_user_message_with_nlu_api(request: Request):
     """ Calls NLU APIs on the most recent user message from Turn.io message data and logs the message data
@@ -67,7 +67,7 @@ async def evaluate_user_message_with_nlu_api(request: Request):
     int_api_resp = text2int(message_text)
-    if int_api_resp == '32202':
         sentiment_api_resp = sentiment(message_text)
         # [{'label': 'POSITIVE', 'score': 0.991188645362854}]
         sent_data_dict = {'type': 'sentiment', 'data': sentiment_api_resp[0]['label']}
@@ -76,4 +76,5 @@ async def evaluate_user_message_with_nlu_api(request: Request):
     prepare_message_data_for_logging(message_data)
     int_data_dict = {'type': 'integer', 'data': int_api_resp}
     return JSONResponse(content=int_data_dict)

 from fastapi.templating import Jinja2Templates
 from pydantic import BaseModel
 from mathtext.sentiment import sentiment
 from mathtext.text2int import text2int
+from modules.nlu import prepare_message_data_for_logging
 app = FastAPI()
     content = {"message": ml_response}
     return JSONResponse(content=content)
 @app.post("/nlu")
 async def evaluate_user_message_with_nlu_api(request: Request):
     """ Calls NLU APIs on the most recent user message from Turn.io message data and logs the message data
     int_api_resp = text2int(message_text)
+    if int_api_resp == 32202:
         sentiment_api_resp = sentiment(message_text)
         # [{'label': 'POSITIVE', 'score': 0.991188645362854}]
         sent_data_dict = {'type': 'sentiment', 'data': sentiment_api_resp[0]['label']}
     prepare_message_data_for_logging(message_data)
     int_data_dict = {'type': 'integer', 'data': int_api_resp}
     return JSONResponse(content=int_data_dict)

modules/nlu.py CHANGED Viewed

@@ -1,18 +1,15 @@
-import environ
-import json
 import os
-import requests
 from datetime import datetime
-from supabase import create_client
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-env = environ.Env()
-env_path = os.path.join(BASE_DIR, '.env')
-environ.Env.read_env('.env')
-SUPA = create_client(env('SUPABASE_URL'), env('SUPABASE_KEY'))
 def log_message_data_through_supabase_api(table_name, log_data):
     return SUPA.table(table_name).insert(log_data).execute()
@@ -28,19 +25,19 @@ def prepare_message_data_for_logging(message_data):
         # Autogenerated fields: id, created_at, modified_at
     }
     project_data_log = log_message_data_through_supabase_api('project', project_data)
     contact_data = {
-        'project': project_data_log.data[0]['id'], # FK
         'original_contact_id': message_data['message']['_vnd']['v1']['chat']['contact_uuid'],
         'urn': "",
         'language_code': "en",
         'contact_inserted_at': format_datetime_in_isoformat(datetime.now())
-        # Autogenerated fields: id, created_at, modified_at
     }
     contact_data_log = log_message_data_through_supabase_api('contact', contact_data)
     message_data = {
-        'contact': contact_data_log.data[0]['id'], # FK
         'original_message_id': message_data['message']['id'],
         'text': message_data['message']['text']['body'],
         'direction': message_data['message']['_vnd']['v1']['direction'],
@@ -49,6 +46,6 @@ def prepare_message_data_for_logging(message_data):
         'message_inserted_at': message_data['message']['_vnd']['v1']['chat']['inserted_at'],
         'message_modified_at': message_data['message']['_vnd']['v1']['chat']['updated_at'],
         'message_sent_at': format_datetime_in_isoformat(datetime.now())
-        # Autogenerated fields: created_at, modified_at
     }
     message_data_log = log_message_data_through_supabase_api('message', message_data)

 import os
 from datetime import datetime
+from dotenv import load_dotenv
+from supabase import create_client
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+load_dotenv()
+SUPA = create_client(os.environ.get('SUPABASE_URL'), os.environ.get('SUPABASE_KEY'))
 def log_message_data_through_supabase_api(table_name, log_data):
     return SUPA.table(table_name).insert(log_data).execute()
         # Autogenerated fields: id, created_at, modified_at
     }
     project_data_log = log_message_data_through_supabase_api('project', project_data)
     contact_data = {
+        'project': project_data_log.data[0]['id'],  # FK
         'original_contact_id': message_data['message']['_vnd']['v1']['chat']['contact_uuid'],
         'urn': "",
         'language_code': "en",
         'contact_inserted_at': format_datetime_in_isoformat(datetime.now())
+        # Autogenerated fields: id, created_at, modified_at
     }
     contact_data_log = log_message_data_through_supabase_api('contact', contact_data)
     message_data = {
+        'contact': contact_data_log.data[0]['id'],  # FK
         'original_message_id': message_data['message']['id'],
         'text': message_data['message']['text']['body'],
         'direction': message_data['message']['_vnd']['v1']['direction'],
         'message_inserted_at': message_data['message']['_vnd']['v1']['chat']['inserted_at'],
         'message_modified_at': message_data['message']['_vnd']['v1']['chat']['updated_at'],
         'message_sent_at': format_datetime_in_isoformat(datetime.now())
+        # Autogenerated fields: created_at, modified_at
     }
     message_data_log = log_message_data_through_supabase_api('message', message_data)

modules/sentiment.py DELETED Viewed

@@ -1,8 +0,0 @@
-from transformers import pipeline
-sentiment_obj = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
-def sentiment(text):
-    # Returns sentiment value
-    return sentiment_obj(text)

modules/text2int.py DELETED Viewed

@@ -1,192 +0,0 @@
-import spacy  # noqa
-# import os
-# os.environ['KMP_DUPLICATE_LIB_OK']='True'
-# import spacy
-# Change this according to what words should be corrected to
-SPELL_CORRECT_MIN_CHAR_DIFF = 2
-TOKENS2INT_ERROR_INT = 32202
-ONES = [
-    "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
-    "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
-    "sixteen", "seventeen", "eighteen", "nineteen",
-]
-CHAR_MAPPING = {
-    "-": " ",
-    "_": " ",
-    "and": " ",
-}
-# CHAR_MAPPING.update((str(i), word) for i, word in enumerate([" " + s + " " for s in ONES]))
-TOKEN_MAPPING = {
-    "and": " ",
-    "oh": "0",
-}
-def find_char_diff(a, b):
-    # Finds the character difference between two str objects by counting the occurences of every character. Not edit distance.
-    char_counts_a = {}
-    char_counts_b = {}
-    for char in a:
-        if char in char_counts_a.keys():
-            char_counts_a[char] += 1
-        else:
-            char_counts_a[char] = 1
-    for char in b:
-        if char in char_counts_b.keys():
-            char_counts_b[char] += 1
-        else:
-            char_counts_b[char] = 1
-    char_diff = 0
-    for i in char_counts_a:
-        if i in char_counts_b.keys():
-            char_diff += abs(char_counts_a[i] - char_counts_b[i])
-        else:
-            char_diff += char_counts_a[i]
-    return char_diff
-def tokenize(text):
-    text = text.lower()
-    # print(text)
-    text = replace_tokens(''.join(i for i in replace_chars(text)).split())
-    # print(text)
-    text = [i for i in text if i != ' ']
-    # print(text)
-    output = []
-    for word in text:
-        # print(word)
-        output.append(convert_word_to_int(word))
-    output = [i for i in output if i != ' ']
-    # print(output)
-    return output
-def detokenize(tokens):
-    return ' '.join(tokens)
-def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
-    return [token_mapping.get(tok, tok) for tok in tokens]
-def replace_chars(text, char_mapping=CHAR_MAPPING):
-    return [char_mapping.get(c, c) for c in text]
-def convert_word_to_int(in_word, numwords={}):
-    # Converts a single word/str into a single int
-    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
-    scales = ["hundred", "thousand", "million", "billion", "trillion"]
-    if not numwords:
-        for idx, word in enumerate(ONES):
-            numwords[word] = idx
-        for idx, word in enumerate(tens):
-            numwords[word] = idx * 10
-        for idx, word in enumerate(scales):
-            numwords[word] = 10 ** (idx * 3 or 2)
-    if in_word in numwords:
-        # print(in_word)
-        # print(numwords[in_word])
-        return numwords[in_word]
-    try:
-        int(in_word)
-        return int(in_word)
-    except ValueError:
-        pass
-    # Spell correction using find_char_diff
-    char_diffs = [find_char_diff(in_word, i) for i in ONES + tens + scales]
-    min_char_diff = min(char_diffs)
-    if min_char_diff <= SPELL_CORRECT_MIN_CHAR_DIFF:
-        return char_diffs.index(min_char_diff)
-def tokens2int(tokens):
-    # Takes a list of tokens and returns a int representation of them
-    types = []
-    for i in tokens:
-        if i <= 9:
-            types.append(1)
-        elif i <= 90:
-            types.append(2)
-        else:
-            types.append(3)
-    # print(tokens)
-    if len(tokens) <= 3:
-        current = 0
-        for i, number in enumerate(tokens):
-            if i != 0 and types[i] < types[i - 1] and current != tokens[i - 1] and types[i - 1] != 3:
-                current += tokens[i] + tokens[i - 1]
-            elif current <= tokens[i] and current != 0:
-                current *= tokens[i]
-            elif 3 not in types and 1 not in types:
-                current = int(''.join(str(i) for i in tokens))
-                break
-            elif '111' in ''.join(str(i) for i in types) and 2 not in types and 3 not in types:
-                current = int(''.join(str(i) for i in tokens))
-                break
-            else:
-                current += number
-    elif 3 not in types and 2 not in types:
-        current = int(''.join(str(i) for i in tokens))
-    else:
-        """
-        double_list = []
-        current_double = []
-        double_type_list = []
-        for i in tokens:
-            if len(current_double) < 2:
-                current_double.append(i)
-            else:
-                double_list.append(current_double)
-                current_double = []
-        current_double = []
-        for i in types:
-            if len(current_double) < 2:
-                current_double.append(i)
-            else:
-                double_type_list.append(current_double)
-                current_double = []
-        print(double_type_list)
-        print(double_list)
-        current = 0
-        for i, type_double in enumerate(double_type_list):
-            if len(type_double) == 1:
-                current += double_list[i][0]
-            elif type_double[0] == type_double[1]:
-                current += int(str(double_list[i][0]) + str(double_list[i][1]))
-            elif type_double[0] > type_double[1]:
-                current += sum(double_list[i])
-            elif type_double[0] < type_double[1]:
-                current += double_list[i][0] * double_list[i][1]
-        #print(current)
-        """
-        count = 0
-        current = 0
-        for i, token in enumerate(tokens):
-            count += 1
-            if count == 2:
-                if types[i - 1] == types[i]:
-                    current += int(str(token) + str(tokens[i - 1]))
-                elif types[i - 1] > types[i]:
-                    current += tokens[i - 1] + token
-                else:
-                    current += tokens[i - 1] * token
-                count = 0
-            elif i == len(tokens) - 1:
-                current += token
-    return current
-def text2int(text):
-    # Wraps all of the functions up into one
-    return tokens2int(tokenize(text))

pyproject.toml ADDED Viewed

	@@ -0,0 +1,40 @@

+[tool.poetry]
+name = "MathText"
+version = "0.0.3"
+authors = [
+  "Sebastian Larsen <[email protected]>",
+  "Çetin ÇAKIR <[email protected]>",
+  "Hobson Lane <[email protected]>",
+  ]
+description = "Natural Language Understanding (text processing) for math symbols, digits, and words with a Gradio user interface and REST API."
+readme = "README.md"
+# requires-python = ">=3.7"
+license = "AGPL-3.0-or-later"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+    "Operating System :: OS Independent",
+]
+[tool.poetry.dependencies]
+fastapi = "0.74.*"
+requests = "2.27.*"
+sentencepiece = "0.1.*"
+uvicorn = "0.17.*"
+pydantic = "*"
+supabase = "*"
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.2"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+# [build-system]
+# requires = ["hatchling"]
+# build-backend = "hatchling.build"
+# repository = "https://gitlab.com/tangibleai/community/mathtext-fastapi"

requirements.txt CHANGED Viewed

@@ -1,16 +1,8 @@
 fastapi==0.74.*
 requests==2.27.*
 sentencepiece==0.1.*
-torch==1.12.*
-transformers==4.24.*
-uvicorn[standard]==0.17.*
-pydantic
-mathtext @ git+https://gitlab.com/tangibleai/community/mathtext@main
-spacy==3.4.*
-pandas==1.5.*
-matplotlib==3.6.*
-pytest==7.2.*
-httpx==0.23.*
-django-environ
 supabase

+mathtext @ git+https://gitlab.com/tangibleai/community/mathtext@main
 fastapi==0.74.*
 requests==2.27.*
 sentencepiece==0.1.*
+uvicorn==0.17.*
+pydantic==1.10.*
+python-dotenv==0.21.*
 supabase