CLT Simulator
Drag the sample size slider to watch the Central Limit Theorem in action. The left plot shows the true population; the right shows the sampling distribution of the mean — notice how it becomes normal as n grows, regardless of the population shape.
#| standalone: true
#| viewerHeight: 600
library(shiny)
# ---------------------------------------------------------------------------
# Helper: draw a single random sample from the chosen distribution
# ---------------------------------------------------------------------------
draw_sample <- function(n, dist) {
switch(dist,
"Uniform(0, 1)" = runif(n),
"Exponential(1)" = rexp(n, rate = 1),
"Right-skewed" = rchisq(n, df = 3),
"Bimodal" = {
k <- rbinom(n, 1, 0.5)
k * rnorm(n, mean = -2, sd = 0.6) + (1 - k) * rnorm(n, mean = 2, sd = 0.6)
},
"Bernoulli(0.3)" = rbinom(n, size = 1, prob = 0.3),
runif(n)
)
}
# Theoretical mean & sd of each population distribution
pop_params <- list(
"Uniform(0, 1)" = list(mu = 0.5, sigma = sqrt(1 / 12)),
"Exponential(1)" = list(mu = 1, sigma = 1),
"Right-skewed" = list(mu = 3, sigma = sqrt(6)),
"Bimodal" = list(mu = 0, sigma = sqrt(0.6^2 + 4)),
"Bernoulli(0.3)" = list(mu = 0.3, sigma = sqrt(0.3 * 0.7))
)
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
ui <- fluidPage(
tags$head(tags$style(HTML("
.stats-box {
background: #eaf2f8; border-radius: 6px; padding: 14px;
margin-top: 12px; font-size: 14px; line-height: 1.8;
}
.stats-box b { color: #2c3e50; }
"))),
sidebarLayout(
sidebarPanel(
width = 3,
selectInput("dist", "Population distribution:",
choices = names(pop_params)),
sliderInput("n", "Sample size (n):",
min = 1, max = 200, value = 5, step = 1),
sliderInput("reps", "Number of samples:",
min = 100, max = 3000, value = 1000, step = 100),
actionButton("resample", "Draw new samples",
class = "btn-primary", width = "100%"),
uiOutput("stats_box")
),
mainPanel(
width = 9,
fluidRow(
column(6, plotOutput("parent_plot", height = "380px")),
column(6, plotOutput("sampling_plot", height = "380px"))
)
)
)
)
# ---------------------------------------------------------------------------
# Server
# ---------------------------------------------------------------------------
server <- function(input, output, session) {
sim <- reactive({
input$resample
n <- input$n
reps <- input$reps
dist <- input$dist
means <- replicate(reps, mean(draw_sample(n, dist)))
params <- pop_params[[dist]]
theo_mu <- params$mu
theo_se <- params$sigma / sqrt(n)
list(means = means, dist = dist, n = n, reps = reps,
theo_mu = theo_mu, theo_se = theo_se)
})
output$parent_plot <- renderPlot({
dist <- input$dist
big <- draw_sample(10000, dist)
par(mar = c(4.5, 4, 3, 1))
hist(big, breaks = 60, probability = TRUE,
col = "#d5e8d4", border = "#82b366",
main = paste("True Population:", dist),
xlab = "x", ylab = "Density")
})
output$sampling_plot <- renderPlot({
s <- sim()
par(mar = c(4.5, 4, 3, 1))
hist(s$means, breaks = 40, probability = TRUE,
col = "#dae8fc", border = "#6c8ebf",
main = paste0("Sampling Distribution of the Mean (n = ", s$n, ")"),
xlab = "Sample mean", ylab = "Density")
x_seq <- seq(min(s$means), max(s$means), length.out = 300)
lines(x_seq, dnorm(x_seq, mean = s$theo_mu, sd = s$theo_se),
col = "#e74c3c", lwd = 2.5)
abline(v = s$theo_mu, lty = 2, lwd = 2, col = "#2c3e50")
legend("topright",
legend = c("Normal approximation", "Theoretical mean"),
col = c("#e74c3c", "#2c3e50"),
lwd = c(2.5, 2),
lty = c(1, 2),
bty = "n", cex = 0.9)
})
output$stats_box <- renderUI({
s <- sim()
tags$div(class = "stats-box",
HTML(paste0(
"<b>Theoretical mean:</b> ", round(s$theo_mu, 4), "<br>",
"<b>Observed mean:</b> ", round(mean(s$means), 4), "<br>",
"<b>Theoretical SE:</b> ", round(s$theo_se, 4), "<br>",
"<b>Observed SD:</b> ", round(sd(s$means), 4)
))
)
})
}
shinyApp(ui, server)
Did you know?
- The CLT was first glimpsed by Abraham de Moivre in 1733, who showed that the binomial distribution approaches a bell curve. Laplace generalized it in 1812. But the rigorous proof for arbitrary distributions came from Aleksandr Lyapunov in 1901 — over 150 years after de Moivre’s insight.
- The normal distribution is sometimes called the “Gaussian” distribution after Carl Friedrich Gauss, but Gauss wasn’t the first to describe it — de Moivre was. Gauss just got better publicity.
- The CLT explains why so many things in nature look bell-shaped: human heights, blood pressure, measurement errors, IQ scores. Whenever an outcome is the sum of many small independent factors, the CLT kicks in.