Elron commited on
Commit
a39fb5f
·
verified ·
1 Parent(s): a4305d3

Upload type_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. type_utils.py +174 -0
type_utils.py CHANGED
@@ -1,6 +1,7 @@
1
  import collections.abc
2
  import io
3
  import itertools
 
4
  import typing
5
 
6
  from .utils import safe_eval
@@ -44,6 +45,179 @@ def parse_type_string(type_string: str) -> typing.Any:
44
  return safe_eval(type_string, safe_context, safe_tokens)
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def isoftype(object, type):
48
  """Checks if an object is of a certain typing type, including nested types.
49
 
 
1
  import collections.abc
2
  import io
3
  import itertools
4
+ import re
5
  import typing
6
 
7
  from .utils import safe_eval
 
45
  return safe_eval(type_string, safe_context, safe_tokens)
46
 
47
 
48
+ def infer_type(obj) -> typing.Any:
49
+ return parse_type_string(infer_type_string(obj))
50
+
51
+
52
+ def infer_type_string(obj: typing.Any) -> str:
53
+ """Encodes the type of a given object into a string.
54
+
55
+ Args:
56
+ obj:Any
57
+
58
+ Returns:
59
+ a string representation of the type of the object. e.g. 'str', 'List[int]', 'Dict[str, Any]'
60
+
61
+ formal definition of the returned string:
62
+ Type -> basic | List[Type] | Dict[Type, Type] | Union[Type (, Type)* | Tuple[Type (,Type)*]
63
+ basic -> bool,str,int,float,Any
64
+ no spaces at all.
65
+
66
+ Examples:
67
+ infer_type_string({"how_much": 7}) returns "Dict[str,int]"
68
+ infer_type_string([1, 2]) returns "List[int]"
69
+ infer_type_string([]) returns "List[Any]") no contents to list to indicate any type
70
+ infer_type_string([[], [7]]) returns "List[List[int]]" type of parent list indicated by the type
71
+ of the non-empty child list. The empty child list is indeed, by default, also of that type
72
+ of the non-empty child.
73
+ infer_type_string([[], 7, True]) returns "List[Union[List[Any],int]]" because bool is also an int
74
+
75
+ """
76
+
77
+ def consume_arg(args_list: str) -> typing.Tuple[str, str]:
78
+ first_word = re.search(r"^(List\[|Dict\[|Union\[|Tuple\[)", args_list)
79
+ if not first_word:
80
+ first_word = re.search(r"^(str|bool|int|float|Any)", args_list)
81
+ assert first_word, "parsing error"
82
+ return first_word.group(), args_list[first_word.span()[1] :]
83
+ arg_to_ret = first_word.group()
84
+ args_list = args_list[first_word.span()[1] :]
85
+ arg, args_list = consume_arg(args_list)
86
+ arg_to_ret += arg
87
+ while args_list.startswith(","):
88
+ arg, args_list = consume_arg(args_list[1:])
89
+ arg_to_ret = arg_to_ret + "," + arg
90
+ assert args_list.startswith("]"), "parsing error"
91
+ return arg_to_ret + "]", args_list[1:]
92
+
93
+ def find_args_in(args: str) -> typing.List[str]:
94
+ to_ret = []
95
+ while len(args) > 0:
96
+ arg, args = consume_arg(args)
97
+ to_ret.append(arg)
98
+ if args.startswith(","):
99
+ args = args[1:]
100
+ return to_ret
101
+
102
+ def is_covered_by(left: str, right: str) -> bool:
103
+ if left == right:
104
+ return True
105
+ if left.startswith("Union["):
106
+ return all(
107
+ is_covered_by(left_el, right) for left_el in find_args_in(left[6:-1])
108
+ )
109
+ if right.startswith("Union["):
110
+ return any(
111
+ is_covered_by(left, right_el) for right_el in find_args_in(right[6:-1])
112
+ )
113
+ if left.startswith("List[") and right.startswith("List["):
114
+ return is_covered_by(
115
+ left[5:-1], right[5:-1]
116
+ ) # un-wrap the leading List[ and the trailing ]
117
+ if left.startswith("Dict[") and right.startswith("Dict["):
118
+ return is_covered_by(
119
+ left[5 : left.find(",")], right[5 : right.find(",")]
120
+ ) and is_covered_by(
121
+ left[1 + left.find(",") : -1], right[1 + right.find(",") : -1]
122
+ )
123
+ if left.startswith("Tuple[") and right.startswith("Tuple["):
124
+ if left.count(",") != right.count(","):
125
+ return False
126
+ return all(
127
+ is_covered_by(left_el, right_el)
128
+ for (left_el, right_el) in zip(
129
+ left[6:-1].split(","), right[6:-1].split(",")
130
+ )
131
+ )
132
+ if left == "bool" and right == "int":
133
+ return True
134
+ if left == "Any":
135
+ return True
136
+
137
+ return False
138
+
139
+ def merge_into(left: str, right: typing.List[str]):
140
+ # merge the set of types from left into the set of types from right, yielding a set that
141
+ # covers both. None of the input sets contain Union as main element. Union may reside inside
142
+ # List, or Dict, or Tuple.
143
+ # This is needed when building a parent List, e.g. from its elements, and the
144
+ # type of that list needs to be the union of the types of its elements.
145
+ # if all elements have same type -- this is the type to write in List[type]
146
+ # if not -- we write List[Union[type1, type2,...]].
147
+
148
+ for right_el in right:
149
+ if is_covered_by(right_el, left):
150
+ right.remove(right_el)
151
+ right.append(left)
152
+ return
153
+ if not any(is_covered_by(left, right_el) for right_el in right):
154
+ right.append(left)
155
+
156
+ def encode_a_list_of_type_names(list_of_type_names: typing.List[str]) -> str:
157
+ # The type_names in the input are the set of names of all the elements of one list object,
158
+ # or all the keys of one dict object, or all the val thereof, or all the type names of a specific position
159
+ # in a tuple object The result should be a name of a type that covers them all.
160
+ # So if, for example, the input contains both 'bool' and 'int', then 'int' suffices to cover both.
161
+ # 'Any' can not show as a type_name of a basic (sub)object, but 'List[Any]' can show for an element of
162
+ # a list object, an element that is an empty list. In such a case, if there are other elements in the input
163
+ # that are more specific, e.g. 'List[str]' we should take the latter, and discard 'List[Any]' in order to get
164
+ # a meaningful result: as narrow as possible but covers all.
165
+ #
166
+ to_ret = []
167
+ for type_name in list_of_type_names:
168
+ merge_into(type_name, to_ret)
169
+
170
+ if len(to_ret) == 1:
171
+ return to_ret[0]
172
+ to_ret.sort()
173
+ ans = "Union["
174
+ for typ in to_ret[:-1]:
175
+ ans += typ + ","
176
+ return ans + to_ret[-1] + "]"
177
+
178
+ basic_types = [bool, int, str, float]
179
+ names_of_basic_types = ["bool", "int", "str", "float"]
180
+ # bool should show before int, because bool is subtype of int
181
+
182
+ for basic_type, name_of_basic_type in zip(basic_types, names_of_basic_types):
183
+ if isinstance(obj, basic_type):
184
+ return name_of_basic_type
185
+ if isinstance(obj, list):
186
+ included_types = set()
187
+ for list_el in obj:
188
+ included_types.add(infer_type_string(list_el))
189
+ included_types = list(included_types)
190
+ if len(included_types) == 0:
191
+ return "List[Any]"
192
+ return "List[" + encode_a_list_of_type_names(included_types) + "]"
193
+ if isinstance(obj, dict):
194
+ if len(obj) == 0:
195
+ return "Dict[Any,Any]"
196
+ included_key_types = set()
197
+ included_val_types = set()
198
+ for k, v in obj.items():
199
+ included_key_types.add(infer_type_string(k))
200
+ included_val_types.add(infer_type_string(v))
201
+ included_key_types = list(included_key_types)
202
+ included_val_types = list(included_val_types)
203
+ return (
204
+ "Dict["
205
+ + encode_a_list_of_type_names(included_key_types)
206
+ + ","
207
+ + encode_a_list_of_type_names(included_val_types)
208
+ + "]"
209
+ )
210
+ if isinstance(obj, tuple):
211
+ if len(obj) == 0:
212
+ return "Tuple[Any]"
213
+ to_ret = "Tuple["
214
+ for sub_tup in obj[:-1]:
215
+ to_ret += infer_type_string(sub_tup) + ","
216
+ return to_ret + infer_type_string(obj[-1]) + "]"
217
+
218
+ return "Any"
219
+
220
+
221
  def isoftype(object, type):
222
  """Checks if an object is of a certain typing type, including nested types.
223