diff --git a/NEWS.md b/NEWS.md index 9506aaf6..1faf8019 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # TidierData.jl updates +## v0.13.3 - 2023-11-23 +- `@slice()` now correctly handles `n()` in grouped data frames + ## v0.13.2 - 2023-11-20 - Adds `@anti_join()` and `@semi_join()` diff --git a/Project.toml b/Project.toml index e842334a..b27fcd2b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TidierData" uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80" authors = ["Karandeep Singh"] -version = "0.13.2" +version = "0.13.3" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" diff --git a/src/docstrings.jl b/src/docstrings.jl index 68d0c378..d36a469b 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -601,7 +601,7 @@ Select, remove or duplicate rows by indexing their integer positions. # Examples ```jldoctest -julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15); +julia> df = DataFrame(a = repeat('a':'c', inner = 3), b = 1:9, c = 11:19); julia> @chain df begin @slice(1:5) @@ -611,36 +611,80 @@ julia> @chain df begin │ Char Int64 Int64 ─────┼──────────────────── 1 │ a 1 11 - 2 │ b 2 12 - 3 │ c 3 13 - 4 │ d 4 14 - 5 │ e 5 15 + 2 │ a 2 12 + 3 │ a 3 13 + 4 │ b 4 14 + 5 │ b 5 15 julia> @chain df begin @slice(-(1:2)) end -3×3 DataFrame +7×3 DataFrame Row │ a b c │ Char Int64 Int64 ─────┼──────────────────── - 1 │ c 3 13 - 2 │ d 4 14 - 3 │ e 5 15 + 1 │ a 3 13 + 2 │ b 4 14 + 3 │ b 5 15 + 4 │ b 6 16 + 5 │ c 7 17 + 6 │ c 8 18 + 7 │ c 9 19 julia> @chain df begin @group_by(a) @slice(1) @ungroup end -5×3 DataFrame +3×3 DataFrame Row │ a b c │ Char Int64 Int64 ─────┼──────────────────── 1 │ a 1 11 - 2 │ b 2 12 - 3 │ c 3 13 - 4 │ d 4 14 - 5 │ e 5 15 + 2 │ b 4 14 + 3 │ c 7 17 + +julia> @chain df begin + @group_by(a) + @slice(n()) + @ungroup + end +3×3 DataFrame + Row │ a b c + │ Char Int64 Int64 +─────┼──────────────────── + 1 │ a 3 13 + 2 │ b 6 16 + 3 │ c 9 19 + +julia> @chain df begin + @group_by(a) + @slice(-n()) + @ungroup + end +6×3 DataFrame + Row │ a b c + │ Char Int64 Int64 +─────┼──────────────────── + 1 │ a 1 11 + 2 │ a 2 12 + 3 │ b 4 14 + 4 │ b 5 15 + 5 │ c 7 17 + 6 │ c 8 18 + +julia> @chain df begin + @group_by(a) + @slice(-(2:n())) + @ungroup + end +3×3 DataFrame + Row │ a b c + │ Char Int64 Int64 +─────┼──────────────────── + 1 │ a 1 11 + 2 │ b 4 14 + 3 │ c 7 17 ``` """ diff --git a/src/slice.jl b/src/slice.jl index 45aa3d84..ffdb6adb 100644 --- a/src/slice.jl +++ b/src/slice.jl @@ -18,9 +18,21 @@ macro slice(df, exprs...) if all(clean_indices .> 0) if $(esc(df)) isa GroupedDataFrame combine($(esc(df)); ungroup = false) do sdf - sdf[clean_indices, :] - end - else + local n_rows_group = nrow(sdf) + local interpolated_indices = parse_slice_n.($exprs, n_rows_group) + local original_indices = [eval.(interpolated_indices)...] + local clean_indices = Int64[] + for index in original_indices + if index isa Number + push!(clean_indices, index) + else + append!(clean_indices, collect(index)) + end + end + clean_indices = filter(i -> i <= n_rows_group, clean_indices) + sdf[clean_indices, :] + end + else combine($(esc(df))) do sdf sdf[clean_indices, :] end @@ -28,10 +40,26 @@ macro slice(df, exprs...) elseif all(clean_indices .< 0) clean_indices = -clean_indices if $(esc(df)) isa GroupedDataFrame - combine($(esc(df)); ungroup = true) do sdf - sdf[Not(clean_indices), :] - end - else + combine($(esc(df)); ungroup = false) do sdf + local n_rows_group = nrow(sdf) + local interpolated_indices = parse_slice_n.($exprs, n_rows_group) + local original_indices = [eval.(interpolated_indices)...] + local clean_indices = Int64[] + for index in original_indices + if index isa Number + # index has to be absolute valued because iniital clean_indices are ignored + # needs to work for -n() and for -(1:n()) + push!(clean_indices, abs(index)) + else + # index has to be absolute valued because iniital clean_indices are ignored + # needs to work for -n() and for -(1:n()) + append!(clean_indices, abs.(collect(index))) + end + end + clean_indices = filter(i -> i <= n_rows_group, clean_indices) + sdf[Not(clean_indices), :] + end + else combine($(esc(df))) do sdf sdf[Not(clean_indices), :] end