|
| 1 | +^{:kindly/hide-code true |
| 2 | + :clay {:title "Convergence to Normal Distribution, independent of original distribution" |
| 3 | + :quarto {:author [:samumbach] |
| 4 | + :type :post |
| 5 | + :date "2025-06-27" |
| 6 | + :category :clojure |
| 7 | + :tags [:clojure.math]}}} |
| 8 | +(ns central-limit-theorem-different-distributions |
| 9 | + (:require [scicloj.tableplot.v1.plotly :as plotly] |
| 10 | + [tablecloth.api :as tc])) |
| 11 | + |
| 12 | +;; We mentioned [last time](central_limit_theorem_convergence.html) that the result of combining more and more |
| 13 | +;; events will approach the normal distribution, regardless of the shape of the original event distribution. |
| 14 | +;; Let's try to demonstrate that visually. |
| 15 | + |
| 16 | +;; Our previous definition of a random event is an example of a uniform distribution: |
| 17 | + |
| 18 | +(defn event [] |
| 19 | + (rand)) |
| 20 | + |
| 21 | +(defn event-sample-dataset [event-fn sample-count] |
| 22 | + {:index (range sample-count) |
| 23 | + :event-value (repeatedly sample-count event-fn)}) |
| 24 | + |
| 25 | +(def uniform-ds (event-sample-dataset event 100000)) |
| 26 | + |
| 27 | +(defn histogram [ds] |
| 28 | + (-> ds |
| 29 | + (tc/dataset) |
| 30 | + (plotly/layer-histogram |
| 31 | + {:=x :event-value |
| 32 | + :=histnorm "count" |
| 33 | + :=histogram-nbins 40}) |
| 34 | + (plotly/layer-point))) |
| 35 | + |
| 36 | +(histogram uniform-ds) |
| 37 | + |
| 38 | +;; If we combine several of these distributions, watch the shape of the distribution: |
| 39 | + |
| 40 | +(defn avg [nums] |
| 41 | + (/ (reduce + nums) (count nums))) |
| 42 | + |
| 43 | +(defn combined-event [number-of-events] |
| 44 | + (avg (repeatedly number-of-events event))) |
| 45 | + |
| 46 | +(histogram (event-sample-dataset #(combined-event 2) 100000)) |
| 47 | + |
| 48 | +(histogram (event-sample-dataset #(combined-event 5) 100000)) |
| 49 | + |
| 50 | +(histogram (event-sample-dataset #(combined-event 20) 100000)) |
| 51 | + |
| 52 | +;; Let's try this again with a different shape of distribution: |
| 53 | + |
| 54 | +(defn triangle-wave [x] |
| 55 | + (-> x (- 0.5) (Math/abs) (* 4.0))) |
| 56 | + |
| 57 | +(-> (let [xs (range 0.0 1.01 0.01) |
| 58 | + ys (mapv triangle-wave xs)] |
| 59 | + (tc/dataset {:x xs :y ys})) |
| 60 | + (plotly/layer-point |
| 61 | + {:=x :x |
| 62 | + :=y :y})) |
| 63 | + |
| 64 | +;; Generating samples from this distribution is more complicated than I initially |
| 65 | +;; expected. This warrants a follow-up, but for now I'll just link to my source |
| 66 | +;; for this method: [_Urban Operations Research_ by Richard C. Larson and Amedeo R. Odoni, Section 7.1.3 Generating Samples from Probability Distributions](https://web.mit.edu/urban_or_book/www/book/chapter7/7.1.3.html) |
| 67 | +;; (see "The rejection method"). |
| 68 | + |
| 69 | +(defn sample-from-function [f x-min x-max y-min y-max] |
| 70 | + (loop [] |
| 71 | + (let [x (+ x-min (* (rand) (- x-max x-min))) |
| 72 | + y (+ y-min (* (rand) (- y-max y-min)))] |
| 73 | + (if (<= y (f x)) |
| 74 | + x |
| 75 | + (recur))))) |
| 76 | + |
| 77 | +(defn event [] |
| 78 | + (sample-from-function triangle-wave 0.0 1.0 0.0 2.0)) |
| 79 | + |
| 80 | +(def triangle-wave-ds (event-sample-dataset event 100000)) |
| 81 | + |
| 82 | +(histogram triangle-wave-ds) |
| 83 | + |
| 84 | +;; Let's combine several of these distributions: |
| 85 | + |
| 86 | +(histogram (event-sample-dataset #(combined-event 2) 100000)) |
| 87 | + |
| 88 | +(histogram (event-sample-dataset #(combined-event 5) 100000)) |
| 89 | + |
| 90 | +(histogram (event-sample-dataset #(combined-event 20) 100000)) |
| 91 | + |
| 92 | +;; I find these visuals surprisingly powerful because you can see the original |
| 93 | +;; distribution "morph" into this characteristic shape. |
| 94 | + |
| 95 | +;; The normal distribution holds a unique place in mathematics and in the world itself: |
| 96 | +;; whenever you combine multiple independent and identically-distributed events, |
| 97 | +;; the result will converge to the normal distribution as the number of |
| 98 | +;; combined events increases. |
0 commit comments