Upload type_utils.py with huggingface_hub
Browse files- type_utils.py +137 -0
type_utils.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import collections.abc
|
2 |
import io
|
3 |
import itertools
|
@@ -7,6 +8,135 @@ import typing
|
|
7 |
from .utils import safe_eval
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def parse_type_string(type_string: str) -> typing.Any:
|
11 |
"""Parses a string representing a Python type hint and evaluates it to return the corresponding type object.
|
12 |
|
@@ -24,6 +154,11 @@ def parse_type_string(type_string: str) -> typing.Any:
|
|
24 |
ValueError: If the type string contains elements not allowed in the safe context
|
25 |
or tokens list.
|
26 |
|
|
|
|
|
|
|
|
|
|
|
27 |
The function uses a predefined safe context with common types from the `typing` module
|
28 |
and basic Python data types. It also defines a list of safe tokens that are allowed
|
29 |
in the type string.
|
@@ -41,6 +176,8 @@ def parse_type_string(type_string: str) -> typing.Any:
|
|
41 |
"Optional": typing.Optional,
|
42 |
}
|
43 |
|
|
|
|
|
44 |
safe_tokens = ["[", "]", ",", " "]
|
45 |
return safe_eval(type_string, safe_context, safe_tokens)
|
46 |
|
|
|
1 |
+
import ast
|
2 |
import collections.abc
|
3 |
import io
|
4 |
import itertools
|
|
|
8 |
from .utils import safe_eval
|
9 |
|
10 |
|
11 |
+
def convert_union_type(type_string: str) -> str:
|
12 |
+
"""Converts Python 3.10 union type hints into form compatible with Python 3.9 version.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
type_string (str): A string representation of a Python type hint. It can be any
|
16 |
+
valid Python type, which does not contain strings (e.g. 'Literal').
|
17 |
+
Examples include 'List[int|float]', 'str|float|bool' etc.
|
18 |
+
|
19 |
+
Formally, the function depends on the input string adhering to the following rules.
|
20 |
+
Assuming that the input is a valid type hint the function does not check that 'word' is
|
21 |
+
'str', 'bool', 'List' etc. It just depends on the following general structure (spaces ignored):
|
22 |
+
type -> word OR type( | type)* OR word[type( , type)*]
|
23 |
+
word is a sequence of (0 or more) chars, each being any char but: [ ] , |
|
24 |
+
This implies that if any of these 4 chars shows not as a meta char of the input
|
25 |
+
type_string, but inside some constant string (of Literal, for example), the scheme
|
26 |
+
will not work.
|
27 |
+
|
28 |
+
Cases like Literal, that might contain occurrences of the four chars above not as meta chars
|
29 |
+
in the type string, must be handled as special cases by this function, as shown for Literal,
|
30 |
+
as an example. Because 'format_type_string' serves as preprocessing for 'parse_type_string',
|
31 |
+
which has a list of allowed types, of which Literal is not a member, Literal and such are not
|
32 |
+
relevant at all now; and the case is brought here just for an example for future use.
|
33 |
+
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
str: A type string with converted union types, which is compatible with typing module.
|
37 |
+
|
38 |
+
Examples:
|
39 |
+
convert_union_type('List[int|float]') -> 'List[Union[int,float]]'
|
40 |
+
convert_union_type('Optional[int|float|bool]') -> 'Optional[Union[int,float,bool]]'
|
41 |
+
|
42 |
+
"""
|
43 |
+
|
44 |
+
def consume_literal(string: str) -> str:
|
45 |
+
# identifies the prefix of string that matches a full Literal typing, with all its constants, including
|
46 |
+
# constants that contain [ ] , etc. on which construct_union_part depends.
|
47 |
+
# string starts with the [ that follows 'Literal'
|
48 |
+
candidate_end = string.find("]")
|
49 |
+
while candidate_end != -1:
|
50 |
+
try:
|
51 |
+
ast.literal_eval(string[: candidate_end + 1])
|
52 |
+
break
|
53 |
+
except Exception:
|
54 |
+
candidate_end = string.find("]", candidate_end + 1)
|
55 |
+
|
56 |
+
if candidate_end == -1:
|
57 |
+
raise ValueError("invalid Literal in input type_string")
|
58 |
+
return string[: candidate_end + 1]
|
59 |
+
|
60 |
+
stack = [""] # the start of a type
|
61 |
+
input = type_string.strip()
|
62 |
+
next_word = re.compile(r"([^\[\],|]*)([\[\],|]|$)")
|
63 |
+
while len(input) > 0:
|
64 |
+
word = next_word.match(input)
|
65 |
+
input = input[len(word.group(0)) :].strip()
|
66 |
+
stack[-1] += word.group(1)
|
67 |
+
if word.group(2) in ["]", ",", ""]: # "" for eol:$
|
68 |
+
# top of stack is now complete to a whole type
|
69 |
+
lwt = stack.pop()
|
70 |
+
if (
|
71 |
+
"|" in lwt
|
72 |
+
): # the | -s are only at the top level of lwt, not inside any subtype
|
73 |
+
lwt = "Union[" + lwt.replace("|", ",") + "]"
|
74 |
+
lwt += word.group(2)
|
75 |
+
if len(stack) > 0:
|
76 |
+
stack[-1] += lwt
|
77 |
+
else:
|
78 |
+
stack = [lwt]
|
79 |
+
if word.group(2) == ",":
|
80 |
+
stack.append("") # to start the expected next type
|
81 |
+
|
82 |
+
elif word.group(2) in ["|"]:
|
83 |
+
# top of stack is the last whole element(s) to be union-ed,
|
84 |
+
# and more are expected
|
85 |
+
stack[-1] += "|"
|
86 |
+
|
87 |
+
else: # "["
|
88 |
+
if word.group(1) == "Literal":
|
89 |
+
literal_ops = consume_literal("[" + input)
|
90 |
+
stack[-1] += literal_ops
|
91 |
+
input = input[len(literal_ops) - 1 :]
|
92 |
+
else:
|
93 |
+
stack[-1] += "["
|
94 |
+
stack.append("")
|
95 |
+
# start type (,type)* inside the []
|
96 |
+
|
97 |
+
assert len(stack) == 1
|
98 |
+
if "|" in stack[0]: # these belong to the top level only
|
99 |
+
stack[0] = "Union[" + stack[0].replace("|", ",") + "]"
|
100 |
+
return stack[0]
|
101 |
+
|
102 |
+
|
103 |
+
def format_type_string(type_string: str) -> str:
|
104 |
+
"""Formats a string representing a valid Python type hint so that it is compatible with Python 3.9 notation.
|
105 |
+
|
106 |
+
Args:
|
107 |
+
type_string (str): A string representation of a Python type hint. This can be any
|
108 |
+
valid type, which does not contain strings (e.g. 'Literal').
|
109 |
+
Examples include 'List[int]', 'Dict[str, Any]', 'Optional[List[str]]', etc.
|
110 |
+
|
111 |
+
Returns:
|
112 |
+
str: A formatted type string.
|
113 |
+
|
114 |
+
Examples:
|
115 |
+
format_type_string('list[int | float]') -> 'List[Union[int,float]]'
|
116 |
+
format_type_string('dict[str, Optional[str]]') -> 'Dict[str,Optional[str]]'
|
117 |
+
|
118 |
+
The function formats valid type string (either after or before Python 3.10) into a
|
119 |
+
form compatible with 3.9. This is done by captilizing the first letter of a lower-cased
|
120 |
+
type name and transferring the 'bitwise or operator' into 'Union' notation. The function
|
121 |
+
also removes whitespaces and redundant module name in type names imported from 'typing'
|
122 |
+
module, e.g. 'typing.Tuple' -> 'Tuple'.
|
123 |
+
|
124 |
+
Currently, the capitalization is applied only to types which unitxt allows, i.e.
|
125 |
+
'list', 'dict', 'tuple'. Moreover, the function expects the input to not contain types
|
126 |
+
which contain strings, for example 'Literal'.
|
127 |
+
"""
|
128 |
+
types_map = {
|
129 |
+
"list": "List",
|
130 |
+
"tuple": "Tuple",
|
131 |
+
"dict": "Dict",
|
132 |
+
"typing.": "",
|
133 |
+
" ": "",
|
134 |
+
}
|
135 |
+
for old_type, new_type in types_map.items():
|
136 |
+
type_string = type_string.replace(old_type, new_type)
|
137 |
+
return convert_union_type(type_string)
|
138 |
+
|
139 |
+
|
140 |
def parse_type_string(type_string: str) -> typing.Any:
|
141 |
"""Parses a string representing a Python type hint and evaluates it to return the corresponding type object.
|
142 |
|
|
|
154 |
ValueError: If the type string contains elements not allowed in the safe context
|
155 |
or tokens list.
|
156 |
|
157 |
+
The function formats the string first if it represents a new Python type hint
|
158 |
+
(i.e. valid since Python 3.10), which uses lowercased names for some types and
|
159 |
+
'bitwise or operator' instead of 'Union', for example: 'list[int|float]' instead
|
160 |
+
of 'List[Union[int,float]]' etc.
|
161 |
+
|
162 |
The function uses a predefined safe context with common types from the `typing` module
|
163 |
and basic Python data types. It also defines a list of safe tokens that are allowed
|
164 |
in the type string.
|
|
|
176 |
"Optional": typing.Optional,
|
177 |
}
|
178 |
|
179 |
+
type_string = format_type_string(type_string)
|
180 |
+
|
181 |
safe_tokens = ["[", "]", ",", " "]
|
182 |
return safe_eval(type_string, safe_context, safe_tokens)
|
183 |
|