Skip to content

Commit

Permalink
Minor improvements to the bootlm
Browse files Browse the repository at this point in the history
Various improvements relating to the 'auto' prior option of the 'bayesian' method
- Performance enhancement in cases where sample sizes (and therefore the automatically calculated prior) are equal; only one simulation is necessary, rather than a separate simulation for each sample.
- Rather than using a single prior for bayesian bootstrap of two samples in the posthoc tests, `bootlm` now uses a separate prior for each sample. This is a better approach when sample sizes are unequal.
- bumped version number
  • Loading branch information
acp29 committed Jun 3, 2024
1 parent 84d0fa4 commit f60410b
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 59 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: statistics-resampling
version: 5.5.17
date: 2024-05-25
version: 5.5.18
date: 2024-06-03
author: Andrew Penn <[email protected]>
maintainer: Andrew Penn <[email protected]>
title: A statistics package with a variety of resampling tools
Expand Down
166 changes: 110 additions & 56 deletions inst/bootlm.m
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@
% calculated exclude variability attributed to other predictors in
% the model. To avoid small sample bias inflating effect sizes for
% posthoc comparisons when use the 'bayesian' method, use the 'auto'
% setting for the prior.
% setting for the prior.
%
% '[...] = bootlm (Y, GROUP, ..., 'seed', SEED)' initialises the Mersenne
% Twister random number generator using an integer SEED value so that
Expand All @@ -393,8 +393,11 @@
% - 'CI_upper': The upper bound(s) of the confidence/credible interval(s)
% - 'pval': The p-value(s) for the hypothesis that the estimate(s) == 0
% - 'fpr': The minimum false positive risk (FPR) for each p-value
% - 'N': The number of independnet sampling units used to compute CIs
% - 'prior': The prior used for Bayesian bootstrap
% - 'N': The number of independent sampling units used to compute CIs
% - 'prior': The prior used for Bayesian bootstrap. This will return a
% scalar for regression coeefficients, or a P x 1 or P x 2
% matrix for estimated marginal means or posthoc tests
% respectively, where P is the number of parameter estimates.
% - 'levels': A cell array with the levels of each predictor.
% - 'contrasts': A cell array of contrasts associated with each predictor.
%
Expand Down Expand Up @@ -680,7 +683,7 @@
GROUP(excl,:) = [];
end
Y(excl) = [];
if ((~ isempty(DEP)) && (~ isscalar(DEP)))
if ((~ isempty (DEP)) && (~ isscalar (DEP)))
DEP(excl) = [];
end
n = numel (Y); % Recalculate total number of observations
Expand Down Expand Up @@ -1111,34 +1114,43 @@
switch (lower (PRIOR))
case 'auto'
PRIOR = 1 - 2 ./ N_dim;
% Use parallel processing if it is available to accelerate
% bootstrap computation for each column of the hypothesis
% matrix.
if (PARALLEL)
if (ISOCTAVE)
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
'UniformOutput', false);
else
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
parfor i = 1:Np
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE);
if (all (PRIOR == PRIOR(1)))
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
fliplr (1 - ALPHA), PRIOR(1), SEED, L, ISOCTAVE);
STATS.prior = PRIOR;
else
% If the prior is not the same across all the samples then
% we need to perform a separate simulation for each sample.
% Use parallel processing if it is available to accelerate
% bootstrap computation for each column of the hypothesis
% matrix.
if (PARALLEL)
if (ISOCTAVE)
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
'UniformOutput', false);
else
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
parfor i = 1:Np
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE);
end
end
else
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
end
else
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
STATS = flatten_struct (cell2mat (STATS));
BOOTSTAT = cell2mat (BOOTSTAT);
end
STATS = flatten_struct (cell2mat (STATS));
BOOTSTAT = cell2mat (BOOTSTAT);
otherwise
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
fliplr (1 - ALPHA), PRIOR, SEED, ...
L, ISOCTAVE);
STATS.prior = PRIOR * ones (Np, 1);
end
% Create empty fields in STATS structure
STATS.pval = [];
Expand Down Expand Up @@ -1166,7 +1178,6 @@
' control group for ''trt_vs_ctrl'''))
end
pairs = feval (POSTHOC{1}, L, POSTHOC{2:end});
L = make_test_matrix (L, pairs);
POSTHOC = POSTHOC{1};
case 'char'
if (strcmp (POSTHOC, 'trt.vs.ctrl'))
Expand All @@ -1176,18 +1187,18 @@
' ''pairwise'' and ''trt_vs_ctrl'''))
end
pairs = feval (POSTHOC, L);
L = make_test_matrix (L, pairs);
otherwise
pairs = unique_stable (POSTHOC, 'rows');
if (size (pairs, 2) > 2)
error (cat (2, 'bootlm: pairs matrix defining posthoc', ...
' comparisons must have exactly two columns'))
end
L = make_test_matrix (L, pairs);
end
Np = size (pairs, 1); % Update the number of parameters
switch (lower (METHOD))
case 'wild'
% Modifying the hypothesis matrix (L) to perform the desired tests
L = make_test_matrix (L, pairs);
Np = size (pairs, 1); % Update the number of parameters
[STATS, BOOTSTAT] = bootwild (Y, X, DEP, NBOOT, ALPHA, SEED, ...
L, ISOCTAVE);
% Control the type 1 error rate across multiple comparisons
Expand All @@ -1199,40 +1210,70 @@
case {'bayes', 'bayesian'}
switch (lower (PRIOR))
case 'auto'
wgt = bsxfun (@rdivide, N_dim(pairs')', ...
sum (N_dim(pairs')', 2));
PRIOR = sum ((1 - wgt) .* (1 - 2 ./ N_dim(pairs')'), 2);
% Use parallel processing if it is available to accelerate
% bootstrap computation for each column of the hypothesis
% matrix.
if (PARALLEL)
if (ISOCTAVE)
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
'UniformOutput', false);
else
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
parfor i = 1:Np
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE);
% To enable the 'auto' prior to be set independently on each
% sample, we use the hypothesis matrix (L) already generated
% to compute the posterior distributions for the estimated
% marginal means each with their own prior. Later, we will
% perform elementwise subtraction for each pair of posterior
% distributions to find the differences
PRIOR = 1 - 2 ./ N_dim;
if (all (PRIOR == PRIOR(1)))
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
fliplr (1 - ALPHA), PRIOR(1), SEED, L, ISOCTAVE);
else
% If the prior is not the same across all the samples then
% we need to perform a separate simulation for each sample.
% Use parallel processing if it is available to accelerate
% bootstrap computation for each column of the hypothesis
% matrix.
if (PARALLEL)
if (ISOCTAVE)
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
'UniformOutput', false);
else
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
parfor i = 1:Np
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
NBOOT, fliplr (1 - ALPHA), ...
PRIOR(i), SEED, L(:, i), ISOCTAVE);
end
end
else
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
end
else
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
STATS = flatten_struct (cell2mat (STATS));
BOOTSTAT = cell2mat (BOOTSTAT);
end
STATS = flatten_struct (cell2mat (STATS));
BOOTSTAT = cell2mat (BOOTSTAT);
otherwise
% Compute the posterior distributions for the estimated
% marginal means each with the PRIOR. Later, we will perform
% elementwise subtraction for each pair of posterior
% distributions to find the differences
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
fliplr (1 - ALPHA), PRIOR, SEED, L, ISOCTAVE);
fliplr (1 - ALPHA), PRIOR, SEED, ...
L, ISOCTAVE);
end
% Create empty fields in STATS structure
STATS.pval = [];
STATS.fpr = [];
% Calculate estimates and credible intervals from the estimated
% marginal means and their associated posterior distributions
Np = size (pairs, 1); % Update the number of parameters
STATS.original = bsxfun (@minus, STATS.original(pairs(:, 1), :), ...
STATS.original(pairs(:, 2), :));
BOOTSTAT = bsxfun (@minus, BOOTSTAT(pairs(:, 1),:), ...
BOOTSTAT(pairs(:, 2),:));
CI = credint (BOOTSTAT, fliplr (1 - ALPHA));
STATS.CI_lower = CI(:,1); STATS.CI_upper = CI(:,2);
if (isscalar (PRIOR))
STATS.prior = PRIOR * ones (Np, 2);
else
STATS.prior = cat (2, PRIOR(pairs(:, 1)), PRIOR(pairs(:, 2)));
end
otherwise
error (cat (2, 'bootlm: unrecignised bootstrap method.', ...
' Use ''wild'' or ''bayesian''.'))
Expand Down Expand Up @@ -1347,9 +1388,16 @@
% If applicable, print parameter estimates (a.k.a contrasts) for fixed
% effects. Parameter estimates correspond to the contrasts we set.
if (isempty (DIM))
switch (lower (METHOD))
case 'wild'
p_str = 'p-val';
case {'bayes', 'bayesian'}
p_str = ' ';
end
fprintf ('\nMODEL COEFFICIENTS\n\n');

fprintf (cat (2, 'name coeff', ...
' CI_lower CI_upper p-val\n'));
' CI_lower CI_upper %s\n'), p_str);
fprintf (cat (2, '--------------------------------------------', ...
'------------------------------------\n'));
else
Expand All @@ -1361,9 +1409,15 @@
fprintf (cat (2, '---------------------------------------', ...
'-----------------------------------------\n'));
otherwise
switch (lower (METHOD))
case 'wild'
p_str = 'p-adj';
case {'bayes', 'bayesian'}
p_str = ' ';
end
fprintf ('\nMODEL POSTHOC COMPARISONS\n\n');
fprintf (cat (2, 'name ', ...
'mean CI_lower CI_upper p-adj\n'));
'mean CI_lower CI_upper %s\n'), p_str);
fprintf (cat (2, '---------------------------------------', ...
'-----------------------------------------\n'));
end
Expand Down
2 changes: 1 addition & 1 deletion inst/bootwild.m
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@
% Calculate the false-positive risk from the minumum Bayes Factor
L10 = 1 ./ minBF; % Convert to Maximum Likelihood ratio L10 (P(H1)/P(H0))
fpr = max (0, 1 ./ (1 + L10)); % Calculate minimum false positive risk
fpr(isnan(p)) = NaN;
fpr(isnan (p)) = NaN;

end

Expand Down

0 comments on commit f60410b

Please sign in to comment.