|
|
|
import re |
|
import datetime |
|
|
|
import numpy as np |
|
|
|
import csv |
|
import ctypes |
|
|
|
"""A module to read arff files.""" |
|
|
|
__all__ = ['MetaData', 'loadarff', 'ArffError', 'ParseArffError'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r_meta = re.compile(r'^\s*@') |
|
|
|
r_comment = re.compile(r'^%') |
|
|
|
r_empty = re.compile(r'^\s+$') |
|
|
|
r_headerline = re.compile(r'^\s*@\S*') |
|
r_datameta = re.compile(r'^@[Dd][Aa][Tt][Aa]') |
|
r_relation = re.compile(r'^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)') |
|
r_attribute = re.compile(r'^\s*@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)') |
|
|
|
r_nominal = re.compile(r'{(.+)}') |
|
r_date = re.compile(r"[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$") |
|
|
|
|
|
r_comattrval = re.compile(r"'(..+)'\s+(..+$)") |
|
|
|
r_wcomattrval = re.compile(r"(\S+)\s+(..+$)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
class ArffError(OSError): |
|
pass |
|
|
|
|
|
class ParseArffError(ArffError): |
|
pass |
|
|
|
|
|
|
|
|
|
|
|
class Attribute: |
|
|
|
type_name = None |
|
|
|
def __init__(self, name): |
|
self.name = name |
|
self.range = None |
|
self.dtype = np.object_ |
|
|
|
@classmethod |
|
def parse_attribute(cls, name, attr_string): |
|
""" |
|
Parse the attribute line if it knows how. Returns the parsed |
|
attribute, or None. |
|
""" |
|
return None |
|
|
|
def parse_data(self, data_str): |
|
""" |
|
Parse a value of this type. |
|
""" |
|
return None |
|
|
|
def __str__(self): |
|
""" |
|
Parse a value of this type. |
|
""" |
|
return self.name + ',' + self.type_name |
|
|
|
|
|
class NominalAttribute(Attribute): |
|
|
|
type_name = 'nominal' |
|
|
|
def __init__(self, name, values): |
|
super().__init__(name) |
|
self.values = values |
|
self.range = values |
|
self.dtype = (np.bytes_, max(len(i) for i in values)) |
|
|
|
@staticmethod |
|
def _get_nom_val(atrv): |
|
"""Given a string containing a nominal type, returns a tuple of the |
|
possible values. |
|
|
|
A nominal type is defined as something framed between braces ({}). |
|
|
|
Parameters |
|
---------- |
|
atrv : str |
|
Nominal type definition |
|
|
|
Returns |
|
------- |
|
poss_vals : tuple |
|
possible values |
|
|
|
Examples |
|
-------- |
|
>>> from scipy.io.arff._arffread import NominalAttribute |
|
>>> NominalAttribute._get_nom_val("{floup, bouga, fl, ratata}") |
|
('floup', 'bouga', 'fl', 'ratata') |
|
""" |
|
m = r_nominal.match(atrv) |
|
if m: |
|
attrs, _ = split_data_line(m.group(1)) |
|
return tuple(attrs) |
|
else: |
|
raise ValueError("This does not look like a nominal string") |
|
|
|
@classmethod |
|
def parse_attribute(cls, name, attr_string): |
|
""" |
|
Parse the attribute line if it knows how. Returns the parsed |
|
attribute, or None. |
|
|
|
For nominal attributes, the attribute string would be like '{<attr_1>, |
|
<attr2>, <attr_3>}'. |
|
""" |
|
if attr_string[0] == '{': |
|
values = cls._get_nom_val(attr_string) |
|
return cls(name, values) |
|
else: |
|
return None |
|
|
|
def parse_data(self, data_str): |
|
""" |
|
Parse a value of this type. |
|
""" |
|
if data_str in self.values: |
|
return data_str |
|
elif data_str == '?': |
|
return data_str |
|
else: |
|
raise ValueError(f"{str(data_str)} value not in {str(self.values)}") |
|
|
|
def __str__(self): |
|
msg = self.name + ",{" |
|
for i in range(len(self.values)-1): |
|
msg += self.values[i] + "," |
|
msg += self.values[-1] |
|
msg += "}" |
|
return msg |
|
|
|
|
|
class NumericAttribute(Attribute): |
|
|
|
def __init__(self, name): |
|
super().__init__(name) |
|
self.type_name = 'numeric' |
|
self.dtype = np.float64 |
|
|
|
@classmethod |
|
def parse_attribute(cls, name, attr_string): |
|
""" |
|
Parse the attribute line if it knows how. Returns the parsed |
|
attribute, or None. |
|
|
|
For numeric attributes, the attribute string would be like |
|
'numeric' or 'int' or 'real'. |
|
""" |
|
|
|
attr_string = attr_string.lower().strip() |
|
|
|
if (attr_string[:len('numeric')] == 'numeric' or |
|
attr_string[:len('int')] == 'int' or |
|
attr_string[:len('real')] == 'real'): |
|
return cls(name) |
|
else: |
|
return None |
|
|
|
def parse_data(self, data_str): |
|
""" |
|
Parse a value of this type. |
|
|
|
Parameters |
|
---------- |
|
data_str : str |
|
string to convert |
|
|
|
Returns |
|
------- |
|
f : float |
|
where float can be nan |
|
|
|
Examples |
|
-------- |
|
>>> from scipy.io.arff._arffread import NumericAttribute |
|
>>> atr = NumericAttribute('atr') |
|
>>> atr.parse_data('1') |
|
1.0 |
|
>>> atr.parse_data('1\\n') |
|
1.0 |
|
>>> atr.parse_data('?\\n') |
|
nan |
|
""" |
|
if '?' in data_str: |
|
return np.nan |
|
else: |
|
return float(data_str) |
|
|
|
def _basic_stats(self, data): |
|
nbfac = data.size * 1. / (data.size - 1) |
|
return (np.nanmin(data), np.nanmax(data), |
|
np.mean(data), np.std(data) * nbfac) |
|
|
|
|
|
class StringAttribute(Attribute): |
|
|
|
def __init__(self, name): |
|
super().__init__(name) |
|
self.type_name = 'string' |
|
|
|
@classmethod |
|
def parse_attribute(cls, name, attr_string): |
|
""" |
|
Parse the attribute line if it knows how. Returns the parsed |
|
attribute, or None. |
|
|
|
For string attributes, the attribute string would be like |
|
'string'. |
|
""" |
|
|
|
attr_string = attr_string.lower().strip() |
|
|
|
if attr_string[:len('string')] == 'string': |
|
return cls(name) |
|
else: |
|
return None |
|
|
|
|
|
class DateAttribute(Attribute): |
|
|
|
def __init__(self, name, date_format, datetime_unit): |
|
super().__init__(name) |
|
self.date_format = date_format |
|
self.datetime_unit = datetime_unit |
|
self.type_name = 'date' |
|
self.range = date_format |
|
self.dtype = np.datetime64(0, self.datetime_unit) |
|
|
|
@staticmethod |
|
def _get_date_format(atrv): |
|
m = r_date.match(atrv) |
|
if m: |
|
pattern = m.group(1).strip() |
|
|
|
datetime_unit = None |
|
if "yyyy" in pattern: |
|
pattern = pattern.replace("yyyy", "%Y") |
|
datetime_unit = "Y" |
|
elif "yy": |
|
pattern = pattern.replace("yy", "%y") |
|
datetime_unit = "Y" |
|
if "MM" in pattern: |
|
pattern = pattern.replace("MM", "%m") |
|
datetime_unit = "M" |
|
if "dd" in pattern: |
|
pattern = pattern.replace("dd", "%d") |
|
datetime_unit = "D" |
|
if "HH" in pattern: |
|
pattern = pattern.replace("HH", "%H") |
|
datetime_unit = "h" |
|
if "mm" in pattern: |
|
pattern = pattern.replace("mm", "%M") |
|
datetime_unit = "m" |
|
if "ss" in pattern: |
|
pattern = pattern.replace("ss", "%S") |
|
datetime_unit = "s" |
|
if "z" in pattern or "Z" in pattern: |
|
raise ValueError("Date type attributes with time zone not " |
|
"supported, yet") |
|
|
|
if datetime_unit is None: |
|
raise ValueError("Invalid or unsupported date format") |
|
|
|
return pattern, datetime_unit |
|
else: |
|
raise ValueError("Invalid or no date format") |
|
|
|
@classmethod |
|
def parse_attribute(cls, name, attr_string): |
|
""" |
|
Parse the attribute line if it knows how. Returns the parsed |
|
attribute, or None. |
|
|
|
For date attributes, the attribute string would be like |
|
'date <format>'. |
|
""" |
|
|
|
attr_string_lower = attr_string.lower().strip() |
|
|
|
if attr_string_lower[:len('date')] == 'date': |
|
date_format, datetime_unit = cls._get_date_format(attr_string) |
|
return cls(name, date_format, datetime_unit) |
|
else: |
|
return None |
|
|
|
def parse_data(self, data_str): |
|
""" |
|
Parse a value of this type. |
|
""" |
|
date_str = data_str.strip().strip("'").strip('"') |
|
if date_str == '?': |
|
return np.datetime64('NaT', self.datetime_unit) |
|
else: |
|
dt = datetime.datetime.strptime(date_str, self.date_format) |
|
return np.datetime64(dt).astype( |
|
f"datetime64[{self.datetime_unit}]") |
|
|
|
def __str__(self): |
|
return super().__str__() + ',' + self.date_format |
|
|
|
|
|
class RelationalAttribute(Attribute): |
|
|
|
def __init__(self, name): |
|
super().__init__(name) |
|
self.type_name = 'relational' |
|
self.dtype = np.object_ |
|
self.attributes = [] |
|
self.dialect = None |
|
|
|
@classmethod |
|
def parse_attribute(cls, name, attr_string): |
|
""" |
|
Parse the attribute line if it knows how. Returns the parsed |
|
attribute, or None. |
|
|
|
For date attributes, the attribute string would be like |
|
'date <format>'. |
|
""" |
|
|
|
attr_string_lower = attr_string.lower().strip() |
|
|
|
if attr_string_lower[:len('relational')] == 'relational': |
|
return cls(name) |
|
else: |
|
return None |
|
|
|
def parse_data(self, data_str): |
|
|
|
elems = list(range(len(self.attributes))) |
|
|
|
escaped_string = data_str.encode().decode("unicode-escape") |
|
|
|
row_tuples = [] |
|
|
|
for raw in escaped_string.split("\n"): |
|
row, self.dialect = split_data_line(raw, self.dialect) |
|
|
|
row_tuples.append(tuple( |
|
[self.attributes[i].parse_data(row[i]) for i in elems])) |
|
|
|
return np.array(row_tuples, |
|
[(a.name, a.dtype) for a in self.attributes]) |
|
|
|
def __str__(self): |
|
return (super().__str__() + '\n\t' + |
|
'\n\t'.join(str(a) for a in self.attributes)) |
|
|
|
|
|
|
|
|
|
|
|
def to_attribute(name, attr_string): |
|
attr_classes = (NominalAttribute, NumericAttribute, DateAttribute, |
|
StringAttribute, RelationalAttribute) |
|
|
|
for cls in attr_classes: |
|
attr = cls.parse_attribute(name, attr_string) |
|
if attr is not None: |
|
return attr |
|
|
|
raise ParseArffError(f"unknown attribute {attr_string}") |
|
|
|
|
|
def csv_sniffer_has_bug_last_field(): |
|
""" |
|
Checks if the bug https://bugs.python.org/issue30157 is unpatched. |
|
""" |
|
|
|
|
|
has_bug = getattr(csv_sniffer_has_bug_last_field, "has_bug", None) |
|
|
|
if has_bug is None: |
|
dialect = csv.Sniffer().sniff("3, 'a'") |
|
csv_sniffer_has_bug_last_field.has_bug = dialect.quotechar != "'" |
|
has_bug = csv_sniffer_has_bug_last_field.has_bug |
|
|
|
return has_bug |
|
|
|
|
|
def workaround_csv_sniffer_bug_last_field(sniff_line, dialect, delimiters): |
|
""" |
|
Workaround for the bug https://bugs.python.org/issue30157 if is unpatched. |
|
""" |
|
if csv_sniffer_has_bug_last_field(): |
|
|
|
right_regex = r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)' |
|
|
|
for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', |
|
r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', |
|
right_regex, |
|
r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): |
|
regexp = re.compile(restr, re.DOTALL | re.MULTILINE) |
|
matches = regexp.findall(sniff_line) |
|
if matches: |
|
break |
|
|
|
|
|
|
|
if restr != right_regex: |
|
return |
|
|
|
groupindex = regexp.groupindex |
|
|
|
|
|
assert len(matches) == 1 |
|
m = matches[0] |
|
|
|
n = groupindex['quote'] - 1 |
|
quote = m[n] |
|
|
|
n = groupindex['delim'] - 1 |
|
delim = m[n] |
|
|
|
n = groupindex['space'] - 1 |
|
space = bool(m[n]) |
|
|
|
dq_regexp = re.compile( |
|
rf"(({re.escape(delim)})|^)\W*{quote}[^{re.escape(delim)}\n]*{quote}[^{re.escape(delim)}\n]*{quote}\W*(({re.escape(delim)})|$)", re.MULTILINE |
|
) |
|
|
|
doublequote = bool(dq_regexp.search(sniff_line)) |
|
|
|
dialect.quotechar = quote |
|
if delim in delimiters: |
|
dialect.delimiter = delim |
|
dialect.doublequote = doublequote |
|
dialect.skipinitialspace = space |
|
|
|
|
|
def split_data_line(line, dialect=None): |
|
delimiters = ",\t" |
|
|
|
|
|
|
|
csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2)) |
|
|
|
|
|
if line[-1] == '\n': |
|
line = line[:-1] |
|
|
|
|
|
line = line.strip() |
|
|
|
sniff_line = line |
|
|
|
|
|
|
|
if not any(d in line for d in delimiters): |
|
sniff_line += "," |
|
|
|
if dialect is None: |
|
dialect = csv.Sniffer().sniff(sniff_line, delimiters=delimiters) |
|
workaround_csv_sniffer_bug_last_field(sniff_line=sniff_line, |
|
dialect=dialect, |
|
delimiters=delimiters) |
|
|
|
row = next(csv.reader([line], dialect)) |
|
|
|
return row, dialect |
|
|
|
|
|
|
|
|
|
|
|
def tokenize_attribute(iterable, attribute): |
|
"""Parse a raw string in header (e.g., starts by @attribute). |
|
|
|
Given a raw string attribute, try to get the name and type of the |
|
attribute. Constraints: |
|
|
|
* The first line must start with @attribute (case insensitive, and |
|
space like characters before @attribute are allowed) |
|
* Works also if the attribute is spread on multilines. |
|
* Works if empty lines or comments are in between |
|
|
|
Parameters |
|
---------- |
|
attribute : str |
|
the attribute string. |
|
|
|
Returns |
|
------- |
|
name : str |
|
name of the attribute |
|
value : str |
|
value of the attribute |
|
next : str |
|
next line to be parsed |
|
|
|
Examples |
|
-------- |
|
If attribute is a string defined in python as r"floupi real", will |
|
return floupi as name, and real as value. |
|
|
|
>>> from scipy.io.arff._arffread import tokenize_attribute |
|
>>> iterable = iter([0] * 10) # dummy iterator |
|
>>> tokenize_attribute(iterable, r"@attribute floupi real") |
|
('floupi', 'real', 0) |
|
|
|
If attribute is r"'floupi 2' real", will return 'floupi 2' as name, |
|
and real as value. |
|
|
|
>>> tokenize_attribute(iterable, r" @attribute 'floupi 2' real ") |
|
('floupi 2', 'real', 0) |
|
|
|
""" |
|
sattr = attribute.strip() |
|
mattr = r_attribute.match(sattr) |
|
if mattr: |
|
|
|
atrv = mattr.group(1) |
|
if r_comattrval.match(atrv): |
|
name, type = tokenize_single_comma(atrv) |
|
next_item = next(iterable) |
|
elif r_wcomattrval.match(atrv): |
|
name, type = tokenize_single_wcomma(atrv) |
|
next_item = next(iterable) |
|
else: |
|
|
|
|
|
raise ValueError("multi line not supported yet") |
|
else: |
|
raise ValueError(f"First line unparsable: {sattr}") |
|
|
|
attribute = to_attribute(name, type) |
|
|
|
if type.lower() == 'relational': |
|
next_item = read_relational_attribute(iterable, attribute, next_item) |
|
|
|
|
|
return attribute, next_item |
|
|
|
|
|
def tokenize_single_comma(val): |
|
|
|
|
|
m = r_comattrval.match(val) |
|
if m: |
|
try: |
|
name = m.group(1).strip() |
|
type = m.group(2).strip() |
|
except IndexError as e: |
|
raise ValueError("Error while tokenizing attribute") from e |
|
else: |
|
raise ValueError(f"Error while tokenizing single {val}") |
|
return name, type |
|
|
|
|
|
def tokenize_single_wcomma(val): |
|
|
|
|
|
m = r_wcomattrval.match(val) |
|
if m: |
|
try: |
|
name = m.group(1).strip() |
|
type = m.group(2).strip() |
|
except IndexError as e: |
|
raise ValueError("Error while tokenizing attribute") from e |
|
else: |
|
raise ValueError(f"Error while tokenizing single {val}") |
|
return name, type |
|
|
|
|
|
def read_relational_attribute(ofile, relational_attribute, i): |
|
"""Read the nested attributes of a relational attribute""" |
|
|
|
r_end_relational = re.compile(r'^@[Ee][Nn][Dd]\s*' + |
|
relational_attribute.name + r'\s*$') |
|
|
|
while not r_end_relational.match(i): |
|
m = r_headerline.match(i) |
|
if m: |
|
isattr = r_attribute.match(i) |
|
if isattr: |
|
attr, i = tokenize_attribute(ofile, i) |
|
relational_attribute.attributes.append(attr) |
|
else: |
|
raise ValueError(f"Error parsing line {i}") |
|
else: |
|
i = next(ofile) |
|
|
|
i = next(ofile) |
|
return i |
|
|
|
|
|
def read_header(ofile): |
|
"""Read the header of the iterable ofile.""" |
|
i = next(ofile) |
|
|
|
|
|
while r_comment.match(i): |
|
i = next(ofile) |
|
|
|
|
|
relation = None |
|
attributes = [] |
|
while not r_datameta.match(i): |
|
m = r_headerline.match(i) |
|
if m: |
|
isattr = r_attribute.match(i) |
|
if isattr: |
|
attr, i = tokenize_attribute(ofile, i) |
|
attributes.append(attr) |
|
else: |
|
isrel = r_relation.match(i) |
|
if isrel: |
|
relation = isrel.group(1) |
|
else: |
|
raise ValueError(f"Error parsing line {i}") |
|
i = next(ofile) |
|
else: |
|
i = next(ofile) |
|
|
|
return relation, attributes |
|
|
|
|
|
class MetaData: |
|
"""Small container to keep useful information on a ARFF dataset. |
|
|
|
Knows about attributes names and types. |
|
|
|
Examples |
|
-------- |
|
:: |
|
|
|
data, meta = loadarff('iris.arff') |
|
# This will print the attributes names of the iris.arff dataset |
|
for i in meta: |
|
print(i) |
|
# This works too |
|
meta.names() |
|
# Getting attribute type |
|
types = meta.types() |
|
|
|
Methods |
|
------- |
|
names |
|
types |
|
|
|
Notes |
|
----- |
|
Also maintains the list of attributes in order, i.e., doing for i in |
|
meta, where meta is an instance of MetaData, will return the |
|
different attribute names in the order they were defined. |
|
""" |
|
def __init__(self, rel, attr): |
|
self.name = rel |
|
self._attributes = {a.name: a for a in attr} |
|
|
|
def __repr__(self): |
|
msg = "" |
|
msg += f"Dataset: {self.name}\n" |
|
for i in self._attributes: |
|
msg += f"\t{i}'s type is {self._attributes[i].type_name}" |
|
if self._attributes[i].range: |
|
msg += f", range is {str(self._attributes[i].range)}" |
|
msg += '\n' |
|
return msg |
|
|
|
def __iter__(self): |
|
return iter(self._attributes) |
|
|
|
def __getitem__(self, key): |
|
attr = self._attributes[key] |
|
|
|
return (attr.type_name, attr.range) |
|
|
|
def names(self): |
|
"""Return the list of attribute names. |
|
|
|
Returns |
|
------- |
|
attrnames : list of str |
|
The attribute names. |
|
""" |
|
return list(self._attributes) |
|
|
|
def types(self): |
|
"""Return the list of attribute types. |
|
|
|
Returns |
|
------- |
|
attr_types : list of str |
|
The attribute types. |
|
""" |
|
attr_types = [self._attributes[name].type_name |
|
for name in self._attributes] |
|
return attr_types |
|
|
|
|
|
def loadarff(f): |
|
""" |
|
Read an arff file. |
|
|
|
The data is returned as a record array, which can be accessed much like |
|
a dictionary of NumPy arrays. For example, if one of the attributes is |
|
called 'pressure', then its first 10 data points can be accessed from the |
|
``data`` record array like so: ``data['pressure'][0:10]`` |
|
|
|
|
|
Parameters |
|
---------- |
|
f : file-like or str |
|
File-like object to read from, or filename to open. |
|
|
|
Returns |
|
------- |
|
data : record array |
|
The data of the arff file, accessible by attribute names. |
|
meta : `MetaData` |
|
Contains information about the arff file such as name and |
|
type of attributes, the relation (name of the dataset), etc. |
|
|
|
Raises |
|
------ |
|
ParseArffError |
|
This is raised if the given file is not ARFF-formatted. |
|
NotImplementedError |
|
The ARFF file has an attribute which is not supported yet. |
|
|
|
Notes |
|
----- |
|
|
|
This function should be able to read most arff files. Not |
|
implemented functionality include: |
|
|
|
* date type attributes |
|
* string type attributes |
|
|
|
It can read files with numeric and nominal attributes. It cannot read |
|
files with sparse data ({} in the file). However, this function can |
|
read files with missing data (? in the file), representing the data |
|
points as NaNs. |
|
|
|
Examples |
|
-------- |
|
>>> from scipy.io import arff |
|
>>> from io import StringIO |
|
>>> content = \"\"\" |
|
... @relation foo |
|
... @attribute width numeric |
|
... @attribute height numeric |
|
... @attribute color {red,green,blue,yellow,black} |
|
... @data |
|
... 5.0,3.25,blue |
|
... 4.5,3.75,green |
|
... 3.0,4.00,red |
|
... \"\"\" |
|
>>> f = StringIO(content) |
|
>>> data, meta = arff.loadarff(f) |
|
>>> data |
|
array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')], |
|
dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')]) |
|
>>> meta |
|
Dataset: foo |
|
\twidth's type is numeric |
|
\theight's type is numeric |
|
\tcolor's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black') |
|
|
|
""" |
|
if hasattr(f, 'read'): |
|
ofile = f |
|
else: |
|
ofile = open(f) |
|
try: |
|
return _loadarff(ofile) |
|
finally: |
|
if ofile is not f: |
|
ofile.close() |
|
|
|
|
|
def _loadarff(ofile): |
|
|
|
try: |
|
rel, attr = read_header(ofile) |
|
except ValueError as e: |
|
msg = "Error while parsing header, error was: " + str(e) |
|
raise ParseArffError(msg) from e |
|
|
|
|
|
hasstr = False |
|
for a in attr: |
|
if isinstance(a, StringAttribute): |
|
hasstr = True |
|
|
|
meta = MetaData(rel, attr) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if hasstr: |
|
|
|
|
|
raise NotImplementedError("String attributes not supported yet, sorry") |
|
|
|
ni = len(attr) |
|
|
|
def generator(row_iter, delim=','): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elems = list(range(ni)) |
|
|
|
dialect = None |
|
for raw in row_iter: |
|
|
|
|
|
if r_comment.match(raw) or r_empty.match(raw): |
|
continue |
|
|
|
row, dialect = split_data_line(raw, dialect) |
|
|
|
yield tuple([attr[i].parse_data(row[i]) for i in elems]) |
|
|
|
a = list(generator(ofile)) |
|
|
|
data = np.array(a, [(a.name, a.dtype) for a in attr]) |
|
return data, meta |
|
|
|
|