# Demonstration code for Scientific Computing class, # http://www.math.nyu.edu/faculty/goodman/teaching/SciComp2024/ # PerformanceDemo.py # uses Python 3 and Numpy # Matrix multiplication demo # Observe how the performance of different ways to do the same # computation in Python import numpy as np import numpy.linalg as la import time """ A code for a live demo of Python code performance doing the same task on matrices coded in various ways. First, the matrix product C=AB done in three ways. -- Scalar loops, directly coding the matrix multiplication formulas. -- Vectorized loops, using the numpy integrated dot product to find the dot product of a row of A with a column of B This has the same memory access pattern as the scalar loop method, but avoids repeatedly interpreting the command that does the multiplication. -- Using the integrated @ (matrix multiplication symbol), which calls LApack to do the matrix multiply. This does block matrix multiplication with a block size optimized for the cache structure of the processor. Second, calculate the sum of the elements of a matrix. This is a double loop. In both cases the inner loop is vectorized. The difference is only the memory access pattern -- Column first: access in the order A_{11}, A_{21}, ... , A{n1}, A_{12}, A_{22}, ... -- Row first: access in the order A_{11}, A_{12}, ... , A{1n}, A_{21}, A_{22}, ... The code is meant to be run as a live demo so you experience first hand the performance differences. Type: [python command] PerformanceDemo.py and watch """ #------------------------------------------------------------------- #-------------- Matrix multiplication ------------------------------ #------------------------------------------------------------------- nsl = [100, 200, 300] # n values for scalar loop matrix tests # --------------- Scalar loop ------------------------------------- print("\n matrix multiplication, scalar loops") for n in nsl: A = np.zeros([n,n]) # The zero matrix is good enough for the demo B = np.zeros([n,n]) C = np.zeros([n,n]) # will be the matrix product AB outLine = " n = {n:3d}".format(n=n) print(outLine + ", starting ... ", end="", flush = True) t_start = time.time() # Scalar loop matrix multiplication for i in range(n): for j in range(n): sum = 0. for k in range(n): sum = sum + A[i,k]*B[k,j] C[i,j] = sum t_end = time.time() t_op = t_end - t_start outLine = "done, time = {t:7.2f} sec.".format(t = t_op) print(outLine) # --------------- Vectorized inner loop ------------------------------------- print("\n matrix multiplication, vectorized inner loop") nvl = [500, 1000, 1500] # n values for vectorized loop tests for n in nvl: A = np.zeros([n,n]) # The zero matrix is good enough for the demo B = np.zeros([n,n]) C = np.zeros([n,n]) # will be the matrix product AB outLine = " n = {n:5d}".format(n=n) print( outLine + ", starting ... ", end="", flush = True) t_start = time.time() # Matrix multiplication with the innermost loop vectorized for i in range(n): for j in range(n): C[i,j] = np.dot(A[i,:], B[:,j]) t_end = time.time() t_op = t_end - t_start outLine = "done, time = {t:7.2f} sec.".format(t = t_op) print(outLine) # --------------- Matrix multiplication using LApack --------------------- print("\n matrix multiplication, LApack") nla = [5000, 10000, 15000] # n values for LApack tests for n in nla: A = np.zeros([n,n]) # The zero matrix is good enough for the demo B = np.zeros([n,n]) outLine = " n = {n:5d}".format(n=n) print( outLine + ", starting ... ", end="", flush = True) t_start = time.time() # Matrix multiplication using the LApack routine C = A@B t_end = time.time() t_op = t_end - t_start outLine = "done, time = {t:7.2f} sec.".format(t = t_op) print(outLine) #------------------------------------------------------------------- #-------------- Matrix summation ---------------------------------- #------------------------------------------------------------------- # --------------- matrix sum, column-wise --------------------- ncs = [10000, 20000, 40000] # matrix sizes for column first tests print("\n column-wise summation of matrix elements") for n in ncs: A = np.zeros([n,n]) sum = 0. # will be sum_{j,k} A_{jk} outLine = " n = {n:5d}".format(n=n) print( outLine + ", starting ... ", end="", flush = True) t_start = time.time() # Sum the elements of A, with columns in the inner loop for k in range(n): sum = sum + np.sum(A[:,k]) t_end = time.time() t_op = t_end - t_start outLine = "done, time = {t:7.2f} sec.".format(t = t_op) print(outLine) # --------------- matrix sum, row-wise --------------------- nrs = [10000, 20000, 40000] # matrix sizes for row first tests print("\n row-wise summation of matrix elements") for n in nrs: A = np.zeros([n,n]) sum = 0. # will be sum_{j,k} A_{jk} outLine = " n = {n:5d}".format(n=n) print( outLine + ", starting ... ", end="", flush = True) t_start = time.time() # Sum the elements of A, with rows in the inner loop for k in range(n): sum = sum + np.sum(A[k,:]) t_end = time.time() t_op = t_end - t_start outLine = "done, time = {t:7.2f} sec.".format(t = t_op) print(outLine)