# Python 3 # Author: Jonathan Goodman, goodman@cims.nyu.edu # Fall 2024 # Illustrate some standards of Python coding for Scientific Computing, # Fall, 2024 import numpy as np import matplotlib.pyplot as plt def BinProb( p, n): """Compute and return an array of probabilities under the binomial distribution. Inputs: n (integer): the number of trials p (float): the probability of success in each trial Returns: a numpy array bp of length n+1, where bp[k] = probability of k successes in n trials = p^k(1-p)^(n-k) (n choose k) (n choose k) = binomial coefficient = n!/(k!(n-k)!) """ q = 1. - p # white space and equal sign alignment bp_k = q**n # will be bp[k] as k, starting with k=0 bp = np.zeros([n+1]) bp[0] = bp_k for k in range(1,(n+1)): # range(1,n+1) is 2,...,n, wrong here. bp_k = bp_k*(p/q)*(n-k+1)/k # recursive formula bp[k] = bp_k # record the answer return bp def CLT_BinProb( p, n): """Compute and return an array of estimates of probabilities under the binomial distribution, using the Central Limit Theorem (CLT) Inputs: n (integer): the number of trials p (float): the probability of success in each trial Returns: a numpy array bp of length n+1, where CLT_pp[k] = approx. prob. of k using the CLT Pr(k) ~ (1/sqrt(2*pi*n*sig^2)) e^(-1/(2n*sig^2)(k-k_bar)^2) where sig^2 = p*(1-p) k_bar = n*p """ k_bar = n*p # most likely value, may not be integer sigsq = p*(1.-p) # sigsq = sig^2 = var(Bernoulli trial) Z = np.sqrt(2*np.pi*n*sigsq) # gaussian prefactor CLT_bp = np.zeros([n+1]) for k in range(n+1): # k = 0, or k = n not special here. CLT_bp[k] = (1/Z)*np.exp(-( (k-k_bar)**2 )/(2*n*sigsq) ) # CLT formula return CLT_bp # *** Experiment 1, small n, print the output as a table *** n = 20 p = .23 bp = BinProb( p, n) CLT_bp = CLT_BinProb( p, n) outLine = "binomial probabilities with p = {p:7.2f} and n = {n:4d}" outLine = outLine.format(p=p, n=n) print(outLine) print("\n k prob CLT prob diff ratio\n") for k in range(n+1): outLine = "{k:4d} {pr:10.2e} {cpr:10.2e} {dif:10.2e} {rat:10.2e}" outLine = outLine.format(k = k, pr=bp[k], cpr = CLT_bp[k], dif = bp[k]-CLT_bp[k], rat = bp[k]/CLT_bp[k]) print(outLine) p_tot = np.sum(bp) outLine = "total probability is {p_tot:7.2f}".format(p_tot = p_tot) print(outLine) # *** Experiment 2, larger n, make plots *** n = 70 p = .18 bp = BinProb( p, n) CLT_bp = CLT_BinProb( p, n) # first plot: linear scale, whole range fig,ax = plt.subplots() ax.plot(range(n+1), bp,label="true probabilities") ax.plot(range(n+1), CLT_bp,label="Central Limit Theorem") ax.grid() ax.legend() ax.set_title(r'Binomial and CLT, whole range $n = {n:4d},\; p = {p:6.2f}$'.format(n=n, p=p)) ax.set_ylabel(r'${\rm Pr}(k)$') ax.set_xlabel(r"$k$") plt.savefig("LinearScaleWholeRange.pdf") # second plot: linear scale, reduced range fig,ax = plt.subplots() z = 4 # number of standard deviations in plot range k_bar = n*p # mean, probably not an int var = n*p*(1-p) # binomial variance formula sig = np.sqrt(var) # standard deviation k_min = int( np.floor(k_bar - z*sig) ) # mean plus or minus z standard devs. k_max = int( np.ceil( k_bar + z*sig) ) if k_min < 0: # keep the "reduced" range in range k_min = 0 if k_max > n: k_max = n k_vals = range(n+1) ax.plot(k_vals[k_min:k_max:1], bp[k_min:k_max:1],label="fake probabilities") ax.plot(k_vals[k_min:k_max:1], CLT_bp[k_min:k_max:1],label="Central Limit Theorem") ax.grid() ax.legend() ax.set_title(r'Binomial and CLT, {z:2d}$\sigma$ range, $n = {n:4d},\; p = {p:6.2f}$'.format(z=z, n=n, p=p)) ax.set_ylabel(r'${\rm Pr}(k)$') ax.set_xlabel(r"$k$") plt.savefig("LinearScaleReducedRange.pdf") # third plot: linear scale, whole range fig,ax = plt.subplots() ax.semilogy(range(n+1), bp,label="true probabilities") ax.semilogy(range(n+1), CLT_bp,label="Central Limit Theorem") ax.grid() ax.legend() ax.set_title(r'Binomial and CLT, whole range, log scale $n = {n:4d},\; p = {p:6.2f}$'.format(n=n, p=p)) ax.set_ylabel(r'${\rm Pr}(k)$') ax.set_xlabel(r"$k$") plt.savefig("LogScaleWholeRange.pdf")