CLT Simulator

Drag the sample size slider to watch the Central Limit Theorem in action. The left plot shows the true population; the right shows the sampling distribution of the mean — notice how it becomes normal as n grows, regardless of the population shape.

The Oracle View. We choose the population distribution and can draw unlimited samples to build the sampling distribution. In practice, you have one sample from an unknown distribution and rely on the CLT to tell you the sampling distribution is approximately normal.

#| standalone: true
#| viewerHeight: 600

library(shiny)

# ---------------------------------------------------------------------------
# Helper: draw a single random sample from the chosen distribution
# ---------------------------------------------------------------------------
draw_sample <- function(n, dist) {
  switch(dist,
    "Uniform(0, 1)"      = runif(n),
    "Exponential(1)"     = rexp(n, rate = 1),
    "Right-skewed"       = rchisq(n, df = 3),
    "Bimodal"            = {
      k <- rbinom(n, 1, 0.5)
      k * rnorm(n, mean = -2, sd = 0.6) + (1 - k) * rnorm(n, mean = 2, sd = 0.6)
    },
    "Bernoulli(0.3)"     = rbinom(n, size = 1, prob = 0.3),
    runif(n)
  )
}

# Theoretical mean & sd of each population distribution
pop_params <- list(
  "Uniform(0, 1)"  = list(mu = 0.5, sigma = sqrt(1 / 12)),
  "Exponential(1)" = list(mu = 1,   sigma = 1),
  "Right-skewed"   = list(mu = 3,   sigma = sqrt(6)),
  "Bimodal"        = list(mu = 0,   sigma = sqrt(0.6^2 + 4)),
  "Bernoulli(0.3)" = list(mu = 0.3, sigma = sqrt(0.3 * 0.7))
)

# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
ui <- fluidPage(
  tags$head(tags$style(HTML("
    .stats-box {
      background: #eaf2f8; border-radius: 6px; padding: 14px;
      margin-top: 12px; font-size: 14px; line-height: 1.8;
    }
    .stats-box b { color: #2c3e50; }
  "))),

  sidebarLayout(
    sidebarPanel(
      width = 3,

      selectInput("dist", "Population distribution:",
                  choices = names(pop_params)),

      sliderInput("n", "Sample size (n):",
                  min = 1, max = 200, value = 5, step = 1),

      sliderInput("reps", "Number of samples:",
                  min = 100, max = 3000, value = 1000, step = 100),

      actionButton("resample", "Draw new samples",
                   class = "btn-primary", width = "100%"),

      uiOutput("stats_box")
    ),

    mainPanel(
      width = 9,
      fluidRow(
        column(6, plotOutput("parent_plot", height = "380px")),
        column(6, plotOutput("sampling_plot", height = "380px"))
      )
    )
  )
)

# ---------------------------------------------------------------------------
# Server
# ---------------------------------------------------------------------------
server <- function(input, output, session) {

  sim <- reactive({
    input$resample
    n    <- input$n
    reps <- input$reps
    dist <- input$dist

    means <- replicate(reps, mean(draw_sample(n, dist)))

    params <- pop_params[[dist]]
    theo_mu <- params$mu
    theo_se <- params$sigma / sqrt(n)

    list(means = means, dist = dist, n = n, reps = reps,
         theo_mu = theo_mu, theo_se = theo_se)
  })

  output$parent_plot <- renderPlot({
    dist <- input$dist
    big  <- draw_sample(10000, dist)

    par(mar = c(4.5, 4, 3, 1))
    hist(big, breaks = 60, probability = TRUE,
         col = "#d5e8d4", border = "#82b366",
         main = paste("True Population:", dist),
         xlab = "x", ylab = "Density")
  })

  output$sampling_plot <- renderPlot({
    s <- sim()

    par(mar = c(4.5, 4, 3, 1))
    hist(s$means, breaks = 40, probability = TRUE,
         col = "#dae8fc", border = "#6c8ebf",
         main = paste0("Sampling Distribution of the Mean (n = ", s$n, ")"),
         xlab = "Sample mean", ylab = "Density")

    x_seq <- seq(min(s$means), max(s$means), length.out = 300)
    lines(x_seq, dnorm(x_seq, mean = s$theo_mu, sd = s$theo_se),
          col = "#e74c3c", lwd = 2.5)

    abline(v = s$theo_mu, lty = 2, lwd = 2, col = "#2c3e50")

    legend("topright",
           legend = c("Normal approximation", "Theoretical mean"),
           col    = c("#e74c3c", "#2c3e50"),
           lwd    = c(2.5, 2),
           lty    = c(1, 2),
           bty    = "n", cex = 0.9)
  })

  output$stats_box <- renderUI({
    s <- sim()
    tags$div(class = "stats-box",
      HTML(paste0(
        "<b>Theoretical mean:</b> ",   round(s$theo_mu, 4), "<br>",
        "<b>Observed mean:</b> ",      round(mean(s$means), 4), "<br>",
        "<b>Theoretical SE:</b> ",     round(s$theo_se, 4), "<br>",
        "<b>Observed SD:</b> ",        round(sd(s$means), 4)
      ))
    )
  })
}

shinyApp(ui, server)

Did you know?

  • The CLT was first glimpsed by Abraham de Moivre in 1733, who showed that the binomial distribution approaches a bell curve. Laplace generalized it in 1812. But the rigorous proof for arbitrary distributions came from Aleksandr Lyapunov in 1901 — over 150 years after de Moivre’s insight.
  • The normal distribution is sometimes called the “Gaussian” distribution after Carl Friedrich Gauss, but Gauss wasn’t the first to describe it — de Moivre was. Gauss just got better publicity.
  • The CLT explains why so many things in nature look bell-shaped: human heights, blood pressure, measurement errors, IQ scores. Whenever an outcome is the sum of many small independent factors, the CLT kicks in.