Skip to content
forked from pydata/xarray

Commit f826b65

Browse files
committed
better backcompat
1 parent b295193 commit f826b65

File tree

4 files changed

+92
-12
lines changed

4 files changed

+92
-12
lines changed

xarray/core/dataarray.py

+10
Original file line numberDiff line numberDiff line change
@@ -6749,6 +6749,11 @@ def groupby(
67496749
restore_coord_dims : bool, default: False
67506750
If True, also restore the dimension order of multi-dimensional
67516751
coordinates.
6752+
eagerly_compute_group: bool
6753+
Whether to eagerly compute ``group`` when it is a chunked array.
6754+
This option is to maintain backwards compatibility. Set to False
6755+
to opt-in to future behaviour, where ``group`` is not automatically loaded
6756+
into memory.
67526757
**groupers : Mapping of str to Grouper or Resampler
67536758
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
67546759
One of ``group`` or ``groupers`` must be provided.
@@ -6917,6 +6922,11 @@ def groupby_bins(
69176922
coordinates.
69186923
duplicates : {"raise", "drop"}, default: "raise"
69196924
If bin edges are not unique, raise ValueError or drop non-uniques.
6925+
eagerly_compute_group: bool
6926+
Whether to eagerly compute ``group`` when it is a chunked array.
6927+
This option is to maintain backwards compatibility. Set to False
6928+
to opt-in to future behaviour, where ``group`` is not automatically loaded
6929+
into memory.
69206930
69216931
Returns
69226932
-------

xarray/core/dataset.py

+10
Original file line numberDiff line numberDiff line change
@@ -10382,6 +10382,11 @@ def groupby(
1038210382
restore_coord_dims : bool, default: False
1038310383
If True, also restore the dimension order of multi-dimensional
1038410384
coordinates.
10385+
eagerly_compute_group: bool
10386+
Whether to eagerly compute ``group`` when it is a chunked array.
10387+
This option is to maintain backwards compatibility. Set to False
10388+
to opt-in to future behaviour, where ``group`` is not automatically loaded
10389+
into memory.
1038510390
**groupers : Mapping of str to Grouper or Resampler
1038610391
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
1038710392
One of ``group`` or ``groupers`` must be provided.
@@ -10519,6 +10524,11 @@ def groupby_bins(
1051910524
coordinates.
1052010525
duplicates : {"raise", "drop"}, default: "raise"
1052110526
If bin edges are not unique, raise ValueError or drop non-uniques.
10527+
eagerly_compute_group: bool
10528+
Whether to eagerly compute ``group`` when it is a chunked array.
10529+
This option is to maintain backwards compatibility. Set to False
10530+
to opt-in to future behaviour, where ``group`` is not automatically loaded
10531+
into memory.
1052210532
1052310533
Returns
1052410534
-------

xarray/core/groupby.py

+37-12
Original file line numberDiff line numberDiff line change
@@ -308,21 +308,45 @@ def __post_init__(self) -> None:
308308
# of pd.cut
309309
# We do not want to modify the original object, since the same grouper
310310
# might be used multiple times.
311+
from xarray.groupers import BinGrouper, UniqueGrouper
312+
311313
self.grouper = copy.deepcopy(self.grouper)
312314

313315
self.group = _resolve_group(self.obj, self.group)
314316

315-
if (
316-
self.eagerly_compute_group
317-
and not isinstance(self.group, _DummyGroup)
318-
and is_chunked_array(self.group.variable._data)
317+
if not isinstance(self.group, _DummyGroup) and is_chunked_array(
318+
self.group.variable._data
319319
):
320-
emit_user_level_warning(
321-
f"Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
322-
"is deprecated and will be removed in v2025.05.0. "
323-
"Please load this array's data manually using `.compute` or `.load`.",
324-
DeprecationWarning,
325-
)
320+
if self.eagerly_compute_group is False:
321+
# This requires a pass to discover the groups present
322+
if (
323+
isinstance(self.grouper, UniqueGrouper)
324+
and self.grouper.labels is None
325+
):
326+
raise ValueError(
327+
"Please pass `labels` to UniqueGrouper when grouping by a chunked array."
328+
)
329+
# this requires a pass to compute the bin edges
330+
if isinstance(self.grouper, BinGrouper) and isinstance(
331+
self.grouper.bins, int
332+
):
333+
raise ValueError(
334+
"Please pass explicit bin edges to BinGrouper using the ``bins`` kwarg"
335+
"when grouping by a chunked array."
336+
)
337+
338+
if self.eagerly_compute_group:
339+
emit_user_level_warning(
340+
f""""Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
341+
is deprecated and will raise an error in v2025.05.0.
342+
Please load this array's data manually using `.compute` or `.load`.
343+
To intentionally avoid eager loading, either (1) specify
344+
`.groupby({self.group.name}=UniqueGrouper(labels=...), eagerly_load_group=False)`
345+
or (2) pass explicit bin edges using or `.groupby({self.group.name}=BinGrouper(bins=...),
346+
eagerly_load_group=False)`; as appropriate.""",
347+
DeprecationWarning,
348+
)
349+
self.group = self.group.compute()
326350

327351
self.encoded = self.grouper.factorize(self.group)
328352

@@ -678,8 +702,9 @@ def _raise_if_by_is_chunked(self):
678702
if self._by_chunked:
679703
raise ValueError(
680704
"This method is not supported when lazily grouping by a chunked array. "
681-
"Either load the array in to memory prior to grouping, or explore another "
682-
"way of applying your function, potentially using the `flox` package."
705+
"Either load the array in to memory prior to grouping using .load or .compute, "
706+
" or explore another way of applying your function, "
707+
"potentially using the `flox` package."
683708
)
684709

685710
def _raise_if_not_single_group(self):

xarray/tests/test_groupby.py

+35
Original file line numberDiff line numberDiff line change
@@ -3091,6 +3091,41 @@ def test_groupby_multiple_bin_grouper_missing_groups():
30913091
assert_identical(actual, expected)
30923092

30933093

3094+
@requires_dask
3095+
def test_groupby_dask_eager_load_warnings():
3096+
ds = xr.Dataset(
3097+
{"foo": (("z"), np.arange(12))},
3098+
coords={"x": ("z", np.arange(12)), "y": ("z", np.arange(12))},
3099+
).chunk(z=6)
3100+
3101+
with pytest.warns(DeprecationWarning):
3102+
ds.groupby(x=UniqueGrouper())
3103+
3104+
with pytest.warns(DeprecationWarning):
3105+
ds.groupby("x")
3106+
3107+
with pytest.warns(DeprecationWarning):
3108+
ds.groupby(ds.x)
3109+
3110+
with pytest.raises(ValueError, match="Please pass"):
3111+
ds.groupby("x", eagerly_compute_group=False)
3112+
3113+
# This is technically fine but anyone iterating over the groupby object
3114+
# will see an error, so let's warn and have them opt-in.
3115+
with pytest.warns(DeprecationWarning):
3116+
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]))
3117+
3118+
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]), eagerly_compute_group=False)
3119+
3120+
with pytest.warns(DeprecationWarning):
3121+
ds.groupby_bins("x", bins=3)
3122+
with pytest.raises(ValueError, match="Please pass"):
3123+
ds.groupby_bins("x", bins=3, eagerly_compute_group=False)
3124+
with pytest.warns(DeprecationWarning):
3125+
ds.groupby_bins("x", bins=[1, 2, 3])
3126+
ds.groupby_bins("x", bins=[1, 2, 3], eagerly_compute_group=False)
3127+
3128+
30943129
# Possible property tests
30953130
# 1. lambda x: x
30963131
# 2. grouped-reduce on unique coords is identical to array

0 commit comments

Comments
 (0)