CLT Simulator

Drag the sample size slider to watch the Central Limit Theorem in action. The left plot shows the true population; the right shows the sampling distribution of the mean — notice how it becomes normal as n grows, regardless of the population shape.

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 600

library(shiny)

# ---------------------------------------------------------------------------
# Helper: draw a single random sample from the chosen distribution
# ---------------------------------------------------------------------------
draw_sample <- function(n, dist) {
  switch(dist,
    "Uniform(0, 1)"      = runif(n),
    "Exponential(1)"     = rexp(n, rate = 1),
    "Right-skewed"       = rchisq(n, df = 3),
    "Bimodal"            = {
      k <- rbinom(n, 1, 0.5)
      k * rnorm(n, mean = -2, sd = 0.6) + (1 - k) * rnorm(n, mean = 2, sd = 0.6)
    },
    "Bernoulli(0.3)"     = rbinom(n, size = 1, prob = 0.3),
    runif(n)
  )
}

# Theoretical mean & sd of each population distribution
pop_params <- list(
  "Uniform(0, 1)"  = list(mu = 0.5, sigma = sqrt(1 / 12)),
  "Exponential(1)" = list(mu = 1,   sigma = 1),
  "Right-skewed"   = list(mu = 3,   sigma = sqrt(6)),
  "Bimodal"        = list(mu = 0,   sigma = sqrt(0.6^2 + 4)),
  "Bernoulli(0.3)" = list(mu = 0.3, sigma = sqrt(0.3 * 0.7))
)

# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
ui <- fluidPage(
  tags$head(tags$style(HTML("
    .stats-box {
      background: #eaf2f8; border-radius: 6px; padding: 14px;
      margin-top: 12px; font-size: 14px; line-height: 1.8;
    }
    .stats-box b { color: #2c3e50; }
  "))),

  sidebarLayout(
    sidebarPanel(
      width = 3,

      selectInput("dist", "Population distribution:",
                  choices = names(pop_params)),

      sliderInput("n", "Sample size (n):",
                  min = 1, max = 200, value = 5, step = 1),

      sliderInput("reps", "Number of samples:",
                  min = 100, max = 3000, value = 1000, step = 100),

      actionButton("resample", "Draw new samples",
                   class = "btn-primary", width = "100%"),

      uiOutput("stats_box")
    ),

    mainPanel(
      width = 9,
      fluidRow(
        column(6, plotOutput("parent_plot", height = "380px")),
        column(6, plotOutput("sampling_plot", height = "380px"))
      )
    )
  )
)

# ---------------------------------------------------------------------------
# Server
# ---------------------------------------------------------------------------
server <- function(input, output, session) {

  sim <- reactive({
    input$resample
    n    <- input$n
    reps <- input$reps
    dist <- input$dist

    means <- replicate(reps, mean(draw_sample(n, dist)))

    params <- pop_params[[dist]]
    theo_mu <- params$mu
    theo_se <- params$sigma / sqrt(n)

    list(means = means, dist = dist, n = n, reps = reps,
         theo_mu = theo_mu, theo_se = theo_se)
  })

  output$parent_plot <- renderPlot({
    dist <- input$dist
    big  <- draw_sample(10000, dist)

    par(mar = c(4.5, 4, 3, 1))
    hist(big, breaks = 60, probability = TRUE,
         col = "#d5e8d4", border = "#82b366",
         main = paste("True Population:", dist),
         xlab = "x", ylab = "Density")
  })

  output$sampling_plot <- renderPlot({
    s <- sim()

    par(mar = c(4.5, 4, 3, 1))
    hist(s$means, breaks = 40, probability = TRUE,
         col = "#dae8fc", border = "#6c8ebf",
         main = paste0("Sampling Distribution of the Mean (n = ", s$n, ")"),
         xlab = "Sample mean", ylab = "Density")

    x_seq <- seq(min(s$means), max(s$means), length.out = 300)
    lines(x_seq, dnorm(x_seq, mean = s$theo_mu, sd = s$theo_se),
          col = "#e74c3c", lwd = 2.5)

    abline(v = s$theo_mu, lty = 2, lwd = 2, col = "#2c3e50")

    legend("topright",
           legend = c("Normal approximation", "Theoretical mean"),
           col    = c("#e74c3c", "#2c3e50"),
           lwd    = c(2.5, 2),
           lty    = c(1, 2),
           bty    = "n", cex = 0.9)
  })

  output$stats_box <- renderUI({
    s <- sim()
    tags$div(class = "stats-box",
      HTML(paste0(
        "<b>Theoretical mean:</b> ",   round(s$theo_mu, 4), "<br>",
        "<b>Observed mean:</b> ",      round(mean(s$means), 4), "<br>",
        "<b>Theoretical SE:</b> ",     round(s$theo_se, 4), "<br>",
        "<b>Observed SD:</b> ",        round(sd(s$means), 4)
      ))
    )
  })
}

shinyApp(ui, server)

Did you know?

  • The CLT was first glimpsed by Abraham de Moivre in 1733, who showed that the binomial distribution approaches a bell curve. Laplace generalized it in 1812. But the rigorous proof for arbitrary distributions came from Aleksandr Lyapunov in 1901 — over 150 years after de Moivre’s insight.
  • The normal distribution is sometimes called the “Gaussian” distribution after Carl Friedrich Gauss, but Gauss wasn’t the first to describe it — de Moivre was. Gauss just got better publicity.
  • The CLT explains why so many things in nature look bell-shaped: human heights, blood pressure, measurement errors, IQ scores. Whenever an outcome is the sum of many small independent factors, the CLT kicks in.