# --- Random-effect summaries from a brms binary GLMM ----------------------
# Helpers for summarising a single grouping factor in a logit model that has
# a global intercept and `(1 | group)` structure. They standardise the
# downstream column names (`base_prob`, `group_prob`, `added_prob`,
# `n_total`, `n_success`) so downstream plot/table code is reusable.
#' Posterior draws of a random effect's added probability.
#' Returns one row per draw per group level, with `added_prob =
#' plogis(b_Intercept + r_<group>) - plogis(b_Intercept)`.
extract_added_prob_draws <- function(model, group_col) {
re_param_name <- paste0("r_", group_col)
# spread_draws() uses its own NSE indexing grammar -- `r_<group>[<group>, ]`
# declares the index column to spread -- which the dplyr .data pronoun can't
# express, so this one argument is built with rlang. Everything downstream
# reads columns through .data[[ ]].
re_arg <- rlang::parse_expr(paste0(re_param_name, "[", group_col, ", ]"))
draws <- rlang::inject(
tidybayes::spread_draws(model, b_Intercept, !!re_arg)
)
draws |>
dplyr::mutate(
base_prob = plogis(b_Intercept),
group_prob = plogis(b_Intercept + .data[[re_param_name]]),
added_prob = group_prob - base_prob
)
}
#' Median of `plogis(b_Intercept)` over the posterior — a single
#' representative baseline probability for plot annotations.
compute_baseline_prob <- function(draws) {
draws |>
dplyr::ungroup() |>
dplyr::distinct(.draw, b_Intercept) |>
dplyr::summarise(p = stats::median(plogis(b_Intercept))) |>
dplyr::pull(p)
}
#' Per-group counts from the model frame (so levels match brms' R-name
#' format and any rows brms dropped don't poison the sums). Returns
#' `<group_col>`, `n_total`, `n_success`, with spaces in the group name
#' replaced by dots to match the random-effect names.
compute_group_counts <- function(model_data, group_col, outcome_col) {
model_data |>
dplyr::group_by(.data[[group_col]]) |>
dplyr::summarise(
n_total = dplyr::n(),
n_success = sum(.data[[outcome_col]]),
.groups = "drop"
) |>
dplyr::mutate(dplyr::across(
dplyr::all_of(group_col),
\(x) stringr::str_replace_all(x, " ", "\\.")
))
}
#' Summary stats (median, mean and 2.5/97.5% quantiles of added_prob)
#' joined with counts, plus a rank and display label. `sort_by` selects which
#' statistic the rows are arranged by; `desc` chooses the direction (1 = best).
compute_ranks <- function(
draws,
group_col,
counts_df,
sort_by = "median_added",
desc = TRUE
) {
ranks <- draws |>
dplyr::group_by(.data[[group_col]]) |>
dplyr::summarise(
median_added = stats::median(added_prob),
mean_added = mean(added_prob),
q025_added = stats::quantile(added_prob, probs = 0.025, names = FALSE),
q975_added = stats::quantile(added_prob, probs = 0.975, names = FALSE),
.groups = "drop"
) |>
dplyr::left_join(counts_df, by = group_col)
ranks <- if (desc) {
dplyr::arrange(ranks, dplyr::desc(.data[[sort_by]]))
} else {
dplyr::arrange(ranks, .data[[sort_by]])
}
# rank is the row's position in the sorted order (1 = best); surface it in the
# label so every row on the plot is tagged with its overall standing
ranks |>
dplyr::mutate(
rank = dplyr::row_number(),
label = sprintf(
"%d. %s (n = %d/%d, prop = %.0f%%)",
rank,
stringr::str_replace_all(.data[[group_col]], "\\.", " "),
n_success,
n_total,
100 * n_success / n_total
)
)
}
#' Build a small inline SVG "half-eye": a posterior density slab over a 95%
#' interval bar + median dot, all on a shared x-axis [xrange] so rows compare
#' directly. A dashed line marks 0 (average). `fill` colours the slab.
.dist_sparkline_svg <- function(values, xrange, fill, w = 130, h = 30) {
d <- stats::density(values, from = xrange[1], to = xrange[2], n = 64)
yn <- d$y / max(d$y) # normalise each slab to its own height
q <- stats::quantile(values, c(0.025, 0.5, 0.975), names = FALSE)
sx <- function(x) (x - xrange[1]) / diff(xrange) * w
top <- 3
slab_base <- h - 10
iv_y <- h - 5
sy <- function(t) slab_base - t * (slab_base - top)
curve <- paste(sprintf("%.1f,%.1f", sx(d$x), sy(yn)), collapse = " ")
parts <- c(
sprintf(
'<line x1="%.1f" y1="2" x2="%.1f" y2="%.1f" stroke="#aaa" stroke-width="0.7" stroke-dasharray="2,2"/>',
sx(0), sx(0), h - 2
),
sprintf(
'<polygon points="%.1f,%.1f %s %.1f,%.1f" fill="%s" fill-opacity="0.9" stroke="%s" stroke-width="0.6"/>',
sx(xrange[1]), slab_base, curve, sx(xrange[2]), slab_base, fill, fill
),
sprintf(
'<line x1="%.1f" y1="%.1f" x2="%.1f" y2="%.1f" stroke="#333" stroke-width="1.4"/>',
sx(q[1]), iv_y, sx(q[3]), iv_y
),
sprintf('<circle cx="%.1f" cy="%.1f" r="1.9" fill="#222"/>', sx(q[2]), iv_y)
)
sprintf(
'<svg width="%d" height="%d" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg">%s</svg>',
w, h, w, h, paste(parts, collapse = "")
)
}
#' Three stacked panels (shared x-axis) summarising the spread of per-player
#' added-probability effects: a histogram of takers vs. goalkeepers on top, then
#' one boxplot per group with the best and worst `n_label` of each labelled.
plot_skill_distribution <- function(
taker_ranks,
gk_ranks,
x_label,
taker_name_col = "taker_name",
gk_name_col = "gk_name",
taker_fill = "#78B7C5",
gk_fill = "#E1AF00",
n_label = 3,
binwidth = 0.005
) {
prep <- function(ranks, name_col, role) {
ranks |>
dplyr::transmute(
role = role,
player = stringr::str_replace_all(.data[[name_col]], "\\.", " "),
median_added = median_added
)
}
takers <- prep(taker_ranks, taker_name_col, "Takers")
gks <- prep(gk_ranks, gk_name_col, "Goalkeepers")
combined <- dplyr::bind_rows(takers, gks)
combined$role <- factor(combined$role, levels = c("Takers", "Goalkeepers"))
fills <- c(Takers = taker_fill, Goalkeepers = gk_fill)
# shared x-range (probability units) so all three panels line up
xlim <- range(combined$median_added)
xlim <- xlim + c(-1, 1) * 0.05 * diff(xlim)
pp_axis <- ggplot2::scale_x_continuous(
labels = scales::label_number(scale = 100)
)
zero_line <- ggplot2::geom_vline(
xintercept = 0, linetype = "dashed", color = "gray50"
)
p_hist <- combined |>
ggplot2::ggplot(ggplot2::aes(median_added, fill = role)) +
ggplot2::geom_histogram(
binwidth = binwidth, position = "identity", alpha = 0.55
) +
zero_line +
pp_axis +
ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0, 0.05))) +
ggplot2::scale_fill_manual(values = fills, name = NULL) +
ggplot2::coord_cartesian(xlim = xlim) +
ggplot2::labs(x = NULL, y = "Count") +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
legend.position = "top",
legend.justification = "left"
)
# one boxplot per group, the best and worst `n_label` labelled; the group's
# name rides the (coloured) y-axis title so the two panels are easy to tell
# apart without leaning on the legend
box_panel <- function(d, role_fill, show_x) {
labelled <- dplyr::bind_rows(
dplyr::slice_max(d, median_added, n = n_label),
dplyr::slice_min(d, median_added, n = n_label)
)
ggplot2::ggplot(d, ggplot2::aes(x = median_added, y = 0)) +
# a normal boxplot: box + whiskers summarise the pack and the boxplot's
# own outliers are drawn as small neutral points; only the best and worst
# few -- labelled on top -- get the group's distinct colour
ggplot2::geom_boxplot(
width = 0.5,
fill = "gray90", color = "gray40", linewidth = 0.4,
outlier.color = "gray70", outlier.size = 0.8, outlier.alpha = 0.7
) +
ggplot2::geom_point(data = labelled, size = 1.5, color = role_fill) +
ggrepel::geom_text_repel(
data = labelled,
ggplot2::aes(label = player),
size = 3.3, direction = "both", nudge_y = 0.35,
force = 6, force_pull = 0.3, box.padding = 0.4, point.padding = 0.2,
segment.size = 0.3, segment.color = "gray50", segment.curvature = -0.1,
arrow = grid::arrow(length = grid::unit(0.006, "npc"), type = "closed"),
min.segment.length = 0.1, max.overlaps = Inf, seed = 42
) +
zero_line +
pp_axis +
ggplot2::coord_cartesian(xlim = xlim, ylim = c(-0.4, 1.6)) +
ggplot2::labs(x = if (show_x) x_label else NULL, y = unique(d$role)) +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
axis.title.y = ggplot2::element_text(color = role_fill, face = "bold"),
axis.text.x = if (show_x) {
ggplot2::element_text()
} else {
ggplot2::element_blank()
},
axis.ticks.x = if (show_x) {
ggplot2::element_line()
} else {
ggplot2::element_blank()
}
)
}
patchwork::wrap_plots(
p_hist,
box_panel(takers, taker_fill, show_x = FALSE),
box_panel(gks, gk_fill, show_x = TRUE),
ncol = 1,
heights = c(4, 2, 2)
)
}
#' Interactive, sortable ranking table built from a compute_ranks() output and
#' its posterior `draws`. Each row shows an inline half-eye (density slab + 95%
#' interval) on a shared x-axis, slab-filled with a reversed Zissou1 colour
#' scaled to the player's median effect (blue = better than average, red =
#' worse; `higher_is_better = FALSE` flips this for goalkeepers). The exact
#' p2.5 / median / p97.5 numbers sit alongside. Shows `page_size` rows per page,
#' default-sorted best-first (first page = top 10); every numeric column is
#' sortable, so a reader can re-rank by the lower or upper bound.
table_re <- function(
ranks_df,
draws,
name_col,
effect_label = "Added probability vs. average",
rate_label = "Observed %",
count_label = "Made",
higher_is_better = TRUE,
page_size = 10,
accent = "#78B7C5"
) {
# shared x-axis covering every row's 95% interval and 0, lightly padded
xrange <- range(c(ranks_df$q025_added, ranks_df$q975_added, 0))
xrange <- xrange + c(-1, 1) * 0.04 * diff(xrange)
# reversed Zissou1, scaled symmetrically about 0 and oriented so "good" is
# always the blue end regardless of whether higher or lower is better
pal <- rev(wesanderson::wes_palette("Zissou1", 100, type = "continuous"))
sgn <- if (higher_is_better) 1 else -1
lim <- max(abs(ranks_df$median_added))
fill_for <- function(m) {
i <- round((sgn * m / lim + 1) / 2 * (length(pal) - 1)) + 1
pal[min(max(i, 1L), length(pal))]
}
# one inline half-eye SVG per group, from that group's posterior draws
svgs <- draws |>
dplyr::ungroup() |>
dplyr::group_by(.data[[name_col]]) |>
dplyr::summarise(
dist = .dist_sparkline_svg(
added_prob, xrange, fill_for(stats::median(added_prob))
),
.groups = "drop"
)
tbl <- ranks_df |>
dplyr::left_join(svgs, by = name_col) |>
dplyr::transmute(
rank = rank,
player = stringr::str_replace_all(.data[[name_col]], "\\.", " "),
kicks = n_total,
made = n_success,
obs_rate = n_success / n_total,
dist = dist,
lo = q025_added,
mid = median_added,
hi = q975_added
)
# signed percentage formatter; the cell keeps the raw numeric value so the
# column still sorts numerically (reactable sorts on the data, not the label)
added_col <- function(header, bold = FALSE) {
reactable::colDef(
name = header,
# added_prob is a difference of two probabilities, so its unit is
# percentage points, not percent
cell = function(value) sprintf("%+.1f pp", 100 * value),
align = "right",
# wide enough that the header keeps its sort arrow on one line
minWidth = 90,
style = if (bold) list(fontWeight = 600) else NULL
)
}
# size a numeric column to its header text plus room for the sort arrow, so
# the header (see defaultColDef) stays on one line without making the column
# wider than its longest label needs -- short-label tables would otherwise
# carry so much dead width the whole table overflows into a horizontal scroll
header_width <- function(label, floor = 56) {
max(floor, round(nchar(label) * 7.5) + 30)
}
reactable::reactable(
tbl,
defaultSorted = list(rank = "asc"),
defaultPageSize = page_size,
showPageSizeOptions = TRUE,
pageSizeOptions = c(10, 25, 50, 100),
searchable = TRUE,
resizable = TRUE,
highlight = TRUE,
compact = TRUE,
# keep every header (and the sort arrow reactable injects into it) on one
# line rather than letting it wrap onto a second row
defaultColDef = reactable::colDef(
headerStyle = list(whiteSpace = "nowrap")
),
columnGroups = list(
reactable::colGroup(
name = paste0(effect_label, " (95% CrI)"),
columns = c("dist", "lo", "mid", "hi")
)
),
columns = list(
rank = reactable::colDef(
name = "#", width = 60, align = "right",
style = list(whiteSpace = "nowrap")
),
player = reactable::colDef(name = "Player", minWidth = 160),
kicks = reactable::colDef(
name = "Kicks", width = header_width("Kicks"), align = "right"
),
made = reactable::colDef(
name = count_label, width = header_width(count_label), align = "right"
),
obs_rate = reactable::colDef(
name = rate_label,
format = reactable::colFormat(percent = TRUE, digits = 0),
width = header_width(rate_label),
align = "right"
),
dist = reactable::colDef(
name = "Distribution",
html = TRUE,
sortable = FALSE,
align = "center",
minWidth = 150
),
lo = added_col("p2.5"),
mid = added_col("Median", bold = TRUE),
hi = added_col("p97.5")
),
theme = reactable::reactableTheme(
highlightColor = "#eef6f8",
headerStyle = list(borderBottom = paste0("2px solid ", accent))
)
)
}