python - Possible bug with `xarray.Dataset.groupby()`? -
i'm using xarray version 0.8.0, python 3.5.1, on mac os x el capitan 10.11.6.
the following code works expected.
id_data_array = xarray.dataarray([280, 306, 280], coords={"index": range(3)}) random = numpy.random.rand(3) score_data_array = xarray.dataarray(random, coords={"index": range(3)}) score_dataset = xarray.dataset({"id": id_data_array, "score": score_data_array}) print(score_dataset) print("======") print(score_dataset.groupby("id").count())
output:
<xarray.dataset> dimensions: (index: 3) coordinates: * index (index) int64 0 1 2 data variables: id (index) int64 280 306 280 score (index) float64 0.8358 0.7536 0.9495 ====== <xarray.dataset> dimensions: (id: 2) coordinates: * id (id) int64 280 306 data variables: score (id) int64 2 1 in [ ]:
however, if change 1 little thing, make elements of id_data_array
distinct, there error.
code:
id_data_array = xarray.dataarray([280, 306, 120], coords={"index": range(3)}) random = numpy.random.rand(3) score_data_array = xarray.dataarray(random, coords={"index": range(3)}) score_dataset = xarray.dataset({"id": id_data_array, "score": score_data_array}) print(score_dataset) print("======") print(score_dataset.groupby("id").count())
output:
<xarray.dataset> dimensions: (index: 3) coordinates: * index (index) int64 0 1 2 data variables: id (index) int64 280 306 120 score (index) float64 0.1353 0.0437 0.1687 ====== --------------------------------------------------------------------------- invalidindexerror traceback (most recent call last) <ipython-input-92-cc412270ba2e> in <module>() 5 print(score_dataset) 6 print("======") ----> 7 print(score_dataset.groupby("id").count()) /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/common.py in wrapped_func(self, dim, keep_attrs, **kwargs) 44 return self.reduce(func, dim, keep_attrs, 45 numeric_only=numeric_only, allow_lazy=true, ---> 46 **kwargs) 47 return wrapped_func 48 /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in reduce(self, func, dim, keep_attrs, **kwargs) 605 def reduce_dataset(ds): 606 return ds.reduce(func, dim, keep_attrs, **kwargs) --> 607 return self.apply(reduce_dataset) 608 609 def assign(self, **kwargs): /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in apply(self, func, **kwargs) 562 kwargs.pop('shortcut', none) # ignore shortcut if set (for now) 563 applied = (func(ds, **kwargs) ds in self._iter_grouped()) --> 564 combined = self._concat(applied) 565 result = self._maybe_restore_empty_groups(combined) 566 return result /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in _concat(self, applied) 570 concat_dim, positions = self._infer_concat_args(applied_example) 571 --> 572 combined = concat(applied, concat_dim) 573 reordered = _maybe_reorder(combined, concat_dim, positions) 574 return reordered /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/combine.py in concat(objs, dim, data_vars, coords, compat, positions, indexers, mode, concat_over) 114 raise typeerror('can concatenate xarray dataset , dataarray ' 115 'objects, got %s' % type(first_obj)) --> 116 return f(objs, dim, data_vars, coords, compat, positions) 117 118 /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/combine.py in _dataset_concat(datasets, dim, data_vars, coords, compat, positions) 276 if coord not none: 277 # add concat dimension last ensure in final dataset --> 278 result[coord.name] = coord 279 280 return result /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in __setitem__(self, key, value) 536 raise notimplementederror('cannot yet use dictionary key ' 537 'to set dataset values') --> 538 self.update({key: value}) 539 540 def __delitem__(self, key): /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in update(self, other, inplace) 1434 dataset. 1435 """ -> 1436 variables, coord_names, dims = dataset_update_method(self, other) 1437 1438 return self._replace_vars_and_dims(variables, coord_names, dims, /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/merge.py in dataset_update_method(dataset, other) 490 priority_arg = 1 491 indexes = dataset.indexes --> 492 return merge_core(objs, priority_arg=priority_arg, indexes=indexes) /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/merge.py in merge_core(objs, compat, join, priority_arg, explicit_coords, indexes) 371 372 coerced = coerce_pandas_values(objs) --> 373 aligned = deep_align(coerced, join=join, copy=false, indexes=indexes) 374 expanded = expand_variable_dicts(aligned) 375 /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in deep_align(list_of_variable_maps, join, copy, indexes) 146 out.append(variables) 147 --> 148 aligned = partial_align(*targets, join=join, copy=copy, indexes=indexes) 149 150 key, aligned_obj in zip(keys, aligned): /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in partial_align(*objects, **kwargs) 109 valid_indexers = dict((k, v) k, v in joined_indexes.items() 110 if k in obj.dims) --> 111 result.append(obj.reindex(copy=copy, **valid_indexers)) 112 return tuple(result) 113 /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in reindex(self, indexers, method, tolerance, copy, **kw_indexers) 1216 1217 variables = alignment.reindex_variables( -> 1218 self.variables, self.indexes, indexers, method, tolerance, copy=copy) 1219 return self._replace_vars_and_dims(variables) 1220 /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in reindex_variables(variables, indexes, indexers, method, tolerance, copy) 218 target = utils.safe_cast_to_index(indexers[name]) 219 indexer = index.get_indexer(target, method=method, --> 220 **get_indexer_kwargs) 221 222 to_shape[name] = len(target) /library/frameworks/python.framework/versions/3.5/lib/python3.5/site-packages/pandas/indexes/base.py in get_indexer(self, target, method, limit, tolerance) 2080 2081 if not self.is_unique: -> 2082 raise invalidindexerror('reindexing valid uniquely' 2083 ' valued index objects') 2084 invalidindexerror: reindexing valid uniquely valued index objects
to me seems buggy because if desired behaviour strange. surely, should include case when elements of dataarray
we're grouping distinct?
update
i've uninstalled , reinstalled xarray. new xarray version 0.8.1, , seems work fine. may indeed bug in xarray 0.8.0.
Comments
Post a Comment