#  Python 3
#  Author: Jonathan Goodman, goodman@cims.nyu.edu
#  Fall 2024

#  Illustrate some standards of Python coding for Scientific Computing,
#  Fall, 2024

import numpy as np
import matplotlib.pyplot as plt

def BinProb( p, n):
   """Compute and return an array of probabilities under the binomial
      distribution.
      Inputs: n (integer): the number of trials
              p (float):   the probability of success in each trial
      Returns: a numpy array bp of length n+1, where
               bp[k] = probability of k successes in n trials
                     = p^k(1-p)^(n-k) (n choose k)
                       (n choose k) = binomial coefficient = n!/(k!(n-k)!)
   """
   
   q     = 1. - p              # white space and equal sign alignment
   bp_k  = q**n                # will be bp[k] as k, starting with k=0
   bp    = np.zeros([n+1])
   bp[0] = bp_k
   for k in range(1,(n+1)):      # range(1,n+1) is 2,...,n, wrong here.
      bp_k  = bp_k*(p/q)*(n-k+1)/k   # recursive formula
      bp[k] = bp_k                   # record the answer
   return bp
   
def CLT_BinProb( p, n):
   """Compute and return an array of estimates of probabilities under
      the binomial distribution, using the Central Limit Theorem (CLT)
      Inputs: n (integer): the number of trials
              p (float):   the probability of success in each trial
      Returns: a numpy array bp of length n+1, where
               CLT_pp[k] = approx. prob. of k using the CLT
               Pr(k) ~ (1/sqrt(2*pi*n*sig^2)) e^(-1/(2n*sig^2)(k-k_bar)^2)
               where sig^2 = p*(1-p)
                     k_bar = n*p
   """
   
   k_bar  = n*p                 # most likely value, may not be integer
   sigsq  = p*(1.-p)            # sigsq = sig^2 = var(Bernoulli trial)
   Z      = np.sqrt(2*np.pi*n*sigsq)           # gaussian prefactor
   CLT_bp = np.zeros([n+1])
  
   for k in range(n+1):          # k = 0, or k = n not special here.
      CLT_bp[k] = (1/Z)*np.exp(-( (k-k_bar)**2 )/(2*n*sigsq)  )    # CLT formula
   return CLT_bp
   
#  ***  Experiment 1, small n, print the output as a table   ***
        
n = 20
p = .23

bp     = BinProb( p, n)
CLT_bp = CLT_BinProb( p, n)

outLine = "binomial probabilities with p = {p:7.2f} and n = {n:4d}"
outLine = outLine.format(p=p, n=n)
print(outLine)
print("\n   k       prob       CLT prob      diff       ratio\n")
for k in range(n+1):
   outLine = "{k:4d}   {pr:10.2e}   {cpr:10.2e}  {dif:10.2e}  {rat:10.2e}"
   outLine = outLine.format(k   = k, pr=bp[k], cpr = CLT_bp[k],
                            dif = bp[k]-CLT_bp[k],
                            rat = bp[k]/CLT_bp[k])
   print(outLine)
p_tot = np.sum(bp)
outLine = "total probability is     {p_tot:7.2f}".format(p_tot = p_tot)
print(outLine)

# ***  Experiment 2, larger n, make plots   ***

n = 70
p = .18

bp     = BinProb( p, n)
CLT_bp = CLT_BinProb( p, n)

#   first plot: linear scale, whole range

fig,ax = plt.subplots()

ax.plot(range(n+1), bp,label="true probabilities")
ax.plot(range(n+1), CLT_bp,label="Central Limit Theorem")

ax.grid()
ax.legend()
ax.set_title(r'Binomial and CLT, whole range $n = {n:4d},\; p = {p:6.2f}$'.format(n=n, p=p))
ax.set_ylabel(r'${\rm Pr}(k)$')
ax.set_xlabel(r"$k$")
plt.savefig("LinearScaleWholeRange.pdf")

#   second plot: linear scale, reduced range

fig,ax = plt.subplots()

z = 4                                   # number of standard deviations in plot range
k_bar = n*p                             # mean, probably not an int
var   = n*p*(1-p)                       # binomial variance formula
sig   = np.sqrt(var)                    # standard deviation
k_min = int( np.floor(k_bar - z*sig) )  # mean plus or minus z standard devs.
k_max = int( np.ceil( k_bar + z*sig) )
if k_min < 0:                           # keep the "reduced" range in range
   k_min = 0
if k_max > n:
   k_max = n

k_vals = range(n+1)
ax.plot(k_vals[k_min:k_max:1], bp[k_min:k_max:1],label="fake probabilities")
ax.plot(k_vals[k_min:k_max:1], CLT_bp[k_min:k_max:1],label="Central Limit Theorem")

ax.grid()
ax.legend()
ax.set_title(r'Binomial and CLT, {z:2d}$\sigma$ range, $n = {n:4d},\; p = {p:6.2f}$'.format(z=z, n=n, p=p))
ax.set_ylabel(r'${\rm Pr}(k)$')
ax.set_xlabel(r"$k$")
plt.savefig("LinearScaleReducedRange.pdf")


#   third plot: linear scale, whole range

fig,ax = plt.subplots()

ax.semilogy(range(n+1), bp,label="true probabilities")
ax.semilogy(range(n+1), CLT_bp,label="Central Limit Theorem")

ax.grid()
ax.legend()
ax.set_title(r'Binomial and CLT, whole range, log scale $n = {n:4d},\; p = {p:6.2f}$'.format(n=n, p=p))
ax.set_ylabel(r'${\rm Pr}(k)$')
ax.set_xlabel(r"$k$")
plt.savefig("LogScaleWholeRange.pdf")