#!/usr/bin/env python # # file: 2D Multivariate Gaussian Scatter Plot # # revision history: # # 20261109 (SP): generate 2D Gaussian samples and plot scatter plot #------------------------------------------------------------------------------ # import system modules # import os import sys import numpy as np import matplotlib.pyplot as plt #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # define default values for arguments # DEF_COV = [[1.0, 0.0], [0.0, 1.0]] DEF_MEAN = [0.0, 0.0] DEF_NSAMPLES = int(1000) DEF_OUT_FILE_NAME = "gaussian_2d_scatter.png" DEF_PLOT_TITLE = "2D Multivariate Gaussian Samples" DEF_RANDOM_SEED = 27 DEF_SYMMETRY_TOL = 1e-12 #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ # generate samples from a 2D multivariate Gaussian distribution # def generate_gaussian(mu, cov, nsamples, seed=DEF_RANDOM_SEED): # set the random seed for repeatability # np.random.seed(int(seed)) # check basic input dimensions # mu = np.asarray(mu, dtype=float).reshape(-1) cov = np.asarray(cov, dtype=float) if (mu.size != 2) or (cov.shape != (2, 2)): print("**> Error: mu must be length 2 and cov must be 2x2") return None # ensure the covariance matrix is symmetric # if not np.allclose(cov, cov.T, atol=DEF_SYMMETRY_TOL): print("**> Error: covariance matrix must be symmetric") return None # draw nsamples from N(mu, cov) # x = np.random.multivariate_normal(mean=mu, cov=cov, size=int(nsamples)) # return samples as an (N,2) numpy array # return x # plot a scatter plot of the data # def plot_scatter(x, mu=None, title=DEF_PLOT_TITLE, outfile=DEF_OUT_FILE_NAME): # check input data # if x is None: print("**> Error: no data to plot") return False # ensure x is an (N,2) array # x = np.asarray(x, dtype=float) if (x.ndim != 2) or (x.shape[1] != 2): print("**> Error: x must be an (N,2) array") return False # create the figure # plt.figure() # scatter plot of points # each point is size 12 and transparency is 0.7 # x[:,0] is x-coordinates, x[:,1] is y-coordinates # plt.scatter(x[:, 0], x[:, 1], s=12, alpha=0.7) # overlay the mean location # if mu is not None: # ensure mu is length 2 # mu = np.asarray(mu, dtype=float).reshape(-1) # plot the mean as an 'x' marker # if mu.size == 2: # plt.scatter needs 1D arrays for coordinates # mu[0] is x-coordinate, mu[1] is y-coordinate # set size and marker style by setting s and marker parameters # plt.scatter(mu[0], mu[1], s=80, marker="x") # label the plot # plt.title(title) # label the axes # plt.xlabel("x1") plt.ylabel("x2") # set equal aspect ratio and grid # plt.axis("equal") plt.grid(True) # save to disk # plt.savefig(outfile) print("Saved plot to: %s" % outfile) # exit gracefully # return True # function: main # def main(argv): # set the number of samples and output file name # nsamples = DEF_NSAMPLES # parse command line arguments # if len(argv) > 1: # first argument is number of samples # try: nsamples = int(argv[1]) except: # invalid argument, use default # print("**> Using default value of %d" % nsamples) if len(argv) > 2: # second argument is output file name # outfile = str(argv[2]) else: # use default output file name # outfile = DEF_OUT_FILE_NAME # generate data # x = generate_gaussian(DEF_MEAN, DEF_COV, nsamples) # check for errors # if x is None: return False # plot the samples # plot_scatter(x, mu=DEF_MEAN, title="2D Multivariate Gaussian (N=%d)" % int(nsamples), outfile=outfile) # exit gracefully # return True # begin gracefully # if __name__ == '__main__': main(sys.argv[0:]) # # end of file