|
1 | 1 |
|
2 | 2 | # mapobs
|
3 | 3 |
|
4 |
| -struct MappedData{F,D} <: AbstractDataContainer |
| 4 | +struct MappedData{batched, F, D} <: AbstractDataContainer |
5 | 5 | f::F
|
6 | 6 | data::D
|
7 | 7 | end
|
8 | 8 |
|
9 |
| -Base.show(io::IO, data::MappedData) = print(io, "mapobs($(data.f), $(summary(data.data)))") |
10 |
| -Base.show(io::IO, data::MappedData{F,<:AbstractArray}) where {F} = |
11 |
| - print(io, "mapobs($(data.f), $(ShowLimit(data.data, limit=80)))") |
| 9 | +function Base.show(io::IO, data::MappedData{batched}) where {batched} |
| 10 | + print(io, "mapobs(") |
| 11 | + print(IOContext(io, :compact=>true), data.f) |
| 12 | + print(io, ", ") |
| 13 | + print(IOContext(io, :compact=>true), data.data) |
| 14 | + print(io, "; batched=:$(batched))") |
| 15 | +end |
| 16 | + |
12 | 17 | Base.length(data::MappedData) = numobs(data.data)
|
13 |
| -Base.getindex(data::MappedData, idx::Int) = data.f(getobs(data.data, idx)) |
14 |
| -Base.getindex(data::MappedData, idxs::AbstractVector) = data.f.(getobs(data.data, idxs)) |
| 18 | +Base.getindex(data::MappedData, ::Colon) = data[1:length(data)] |
| 19 | + |
| 20 | +Base.getindex(data::MappedData{:auto}, idx::Int) = data.f(getobs(data.data, idx)) |
| 21 | +Base.getindex(data::MappedData{:auto}, idxs::AbstractVector) = data.f(getobs(data.data, idxs)) |
| 22 | + |
| 23 | +Base.getindex(data::MappedData{:never}, idx::Int) = data.f(getobs(data.data, idx)) |
| 24 | +Base.getindex(data::MappedData{:never}, idxs::AbstractVector) = [data.f(getobs(data.data, idx)) for idx in idxs] |
| 25 | + |
| 26 | +Base.getindex(data::MappedData{:always}, idx::Int) = getobs(data.f(getobs(data.data, [idx])), 1) |
| 27 | +Base.getindex(data::MappedData{:always}, idxs::AbstractVector) = data.f(getobs(data.data, idxs)) |
15 | 28 |
|
16 | 29 |
|
17 | 30 | """
|
18 |
| - mapobs(f, data) |
| 31 | + mapobs(f, data; batched=:auto) |
19 | 32 |
|
20 | 33 | Lazily map `f` over the observations in a data container `data`.
|
| 34 | +Returns a new data container `mdata` that can be indexed and has a length. |
| 35 | +Indexing triggers the transformation `f`. |
| 36 | +
|
| 37 | +The batched keyword argument controls the behavior of `mdata[idx]` and `mdata[idxs]` |
| 38 | +where `idx` is an integer and `idxs` is a vector of integers: |
| 39 | +- `batched=:auto` (default). Let `f` handle the two cases. |
| 40 | + Call `f(getobs(data, idx))` and `f(getobs(data, idxs))`. |
| 41 | +- `batched=:never`. `f` is always called on a single observation. |
| 42 | + Call `f(getobs(data, idx))` and `[f(getobs(data, idx)) for idx in idxs]`. |
| 43 | +- `batched=:always`. `f` is always called on a batch of observations. |
| 44 | + Call `getobs(f(getobs(data, [idx])), 1)` and `f(getobs(data, idxs))`. |
| 45 | +
|
| 46 | +# Examples |
| 47 | +
|
21 | 48 | ```julia
|
22 |
| -data = 1:10 |
23 |
| -getobs(data, 8) == 8 |
24 |
| -mdata = mapobs(-, data) |
25 |
| -getobs(mdata, 8) == -8 |
| 49 | +julia> data = (a=[1,2,3], b=[1,2,3]); |
| 50 | +
|
| 51 | +julia> mdata = mapobs(data) do x |
| 52 | + (c = x.a .+ x.b, d = x.a .- x.b) |
| 53 | + end |
| 54 | +mapobs(#25, (a = [1, 2, 3], b = [1, 2, 3]); batched=:auto)) |
| 55 | +
|
| 56 | +julia> mdata[1] |
| 57 | +(c = 2, d = 0) |
| 58 | +
|
| 59 | +julia> mdata[1:2] |
| 60 | +(c = [2, 4], d = [0, 0]) |
26 | 61 | ```
|
27 | 62 | """
|
28 |
| -mapobs(f, data) = MappedData(f, data) |
29 |
| -mapobs(f::typeof(identity), data) = data |
30 |
| - |
| 63 | +mapobs(f::F, data::D; batched=:auto) where {F,D} = MappedData{batched, F, D}(f, data) |
31 | 64 |
|
32 | 65 | """
|
33 | 66 | mapobs(fs, data)
|
|
0 commit comments