|
import json |
|
from typing import Any, overload |
|
|
|
|
|
|
|
def _extract_string(text: str, start_string: str | None = None, end_string: str | None = None) -> str: |
|
""" |
|
テキストから必要な文字列を抽出する |
|
|
|
Args: |
|
text (str): 抽出するテキスト |
|
|
|
Returns: |
|
str: 抽出された必要な文字列 |
|
""" |
|
|
|
if start_string is not None and start_string in text: |
|
idx_head = text.index(start_string) |
|
text = text[idx_head:] |
|
|
|
if end_string is not None and end_string in text: |
|
idx_tail = len(text) - text[::-1].index(end_string[::-1]) |
|
text = text[:idx_tail] |
|
return text |
|
|
|
|
|
def _delete_first_chapter_tag(text: str, first_character_tag: str | list[str]) -> str: |
|
"""_summary_ |
|
|
|
Args: |
|
text (str): テキスト |
|
first_character_tag (str | list[str]): 最初にある余分な文字列 |
|
|
|
Returns: |
|
str: 除去済みのテキスト |
|
""" |
|
|
|
if isinstance(first_character_tag, str): |
|
first_character_tag = [first_character_tag] |
|
|
|
for first_character_i in first_character_tag: |
|
if text.startswith(first_character_i): |
|
text = text[len(first_character_i) :] |
|
break |
|
return text.strip() |
|
|
|
|
|
def strip_string( |
|
text: str, |
|
first_character: str | list[str] = ["<output>", "<outputs>"], |
|
start_string: str | None = None, |
|
end_string: str | None = None, |
|
strip_quotes: str | list[str] = ["'", '"'], |
|
) -> str: |
|
"""stringの前後の余分な文字を削除する |
|
|
|
Args: |
|
text (str): ChatGPTの出力文字列 |
|
first_character (str, optional): 出力の先頭につく文字 Defaults to ["<output>", "<outputs>"]. |
|
start_string (str, optional): 出力の先頭につく文字 Defaults to None. |
|
end_string (str, optional): 出力の先頭につく文字 Defaults to None. |
|
strip_quotes (str, optional): 前後の余分な'"を消す. Defaults to ["'", '"']. |
|
|
|
Returns: |
|
str: 余分な文字列を消去した文字列 |
|
|
|
Examples: |
|
>>> strip_string("<output>'''ChatGPT is smart!'''", "<output>") |
|
ChatGPT is smart! |
|
>>> strip_string('{"a": 1}', start_string="{", end_string="}") |
|
{"a": 1} |
|
>>> strip_string("<outputs> `neoAI`", strip_quotes="`") |
|
neoAI |
|
""" |
|
|
|
text = _delete_first_chapter_tag(text, first_character) |
|
|
|
if isinstance(strip_quotes, str): |
|
strip_quotes = [strip_quotes] |
|
for quote in strip_quotes: |
|
text = text.strip(quote).strip() |
|
text = _extract_string(text, start_string, end_string) |
|
return text.strip() |
|
|
|
|
|
|
|
|
|
|
|
@overload |
|
def json2dict(json_string: str, error_key: None) -> dict[Any, Any] | str: ... |
|
|
|
|
|
@overload |
|
def json2dict(json_string: str, error_key: str) -> dict[Any, Any]: ... |
|
|
|
|
|
def json2dict(json_string: str, error_key: str | None = "error") -> dict[Any, Any] | str: |
|
""" |
|
JSON文字列をPython dictに変換する |
|
|
|
Args: |
|
json_string (str): 変換するJSON文字列 |
|
error_key (str, optional): エラーキーの値として代入する文字列. Defaults to "error". |
|
|
|
Returns: |
|
dict: 変換されたPython dict |
|
""" |
|
try: |
|
python_dict = json.loads(_extract_string(json_string, start_string="{", end_string="}"), strict=False) |
|
except ValueError: |
|
if error_key is None: |
|
return json_string |
|
python_dict = {error_key: json_string} |
|
if isinstance(python_dict, dict): |
|
return python_dict |
|
return {error_key: python_dict} |
|
|
|
|
|
def json2list(json_string: str, error_key: str | None = "error") -> list[Any] | Any: |
|
""" |
|
JSON文字列をPython dictに変換する |
|
|
|
Args: |
|
json_string (str): 変換するJSON文字列 |
|
error_key (str, optional): エラーキーの値として代入する文字列. Defaults to "error". |
|
|
|
Returns: |
|
dict: 変換されたPython dict |
|
""" |
|
try: |
|
python_list = json.loads(_extract_string(json_string, start_string="[", end_string="]"), strict=False) |
|
except ValueError: |
|
if error_key is None: |
|
return json_string |
|
python_list = {error_key: json_string} |
|
if isinstance(python_list, list): |
|
return python_list |
|
return python_list |
|
|
|
|
|
|
|
|
|
|