import random
import math
import matplotlib.pyplot as plt
import numpy as np
import scipy
from scipy.stats import multivariate_normal, norm
from numpy.random import seed, uniform, randn
from numpy.linalg import inv
Plots for bayesian LR
9.3
$$ \
\
x_{n} y_{n} \
n = 1,……,N \
\
$$
figure 9.5
= 10
N = 0
mu = 0.2**2
sigma
= np.random.uniform(-5, 5, N)
xn = np.random.normal(mu, sigma, N)
epsilon = -np.sin(xn/5) + np.cos(xn) + epsilon
yn = np.column_stack((xn, yn))
dataset = xn.reshape(-1,1)
xn = yn.reshape(-1,1) yn
# Plot the training set
plt.figure()'+', markersize=10)
plt.plot(xn, yn, "$x$")
plt.xlabel("$y$")
plt.ylabel(-5, 5)
plt.ylim(-5, 5) plt.xlim(
(-5.0, 5.0)
= xn.shape
N, D = np.hstack([np.ones((N,1)), xn]) # augmented training inputs of size N x (D+1)
X_aug # theta_aug = np.zeros((D+1, 1)) # new theta vector of size (D+1) x 1
def max_lik_estimate(X, y):
# X: N x D matrix of training inputs
# y: N x 1 vector of training targets/observations
# returns: maximum likelihood parameters (D x 1)
= X.shape
N, D = np.linalg.solve(X.T @ X, X.T @ y) ## <-- SOLUTION
theta_ml return theta_ml
= max_lik_estimate(X_aug, yn)
theta_aug_ml theta_aug_ml
array([[-0.2123287 ],
[-0.18826531]])
= X_aug @ theta_aug_ml
ml_predictions # X: K x D matrix of test inputs
# theta: D x 1 vector of parameters
# returns: prediction of f(X); K x 1 vector
ml_predictions.shape
(10, 1)
# Plot the training set
plt.figure()'+', markersize=10)
plt.plot(xn, yn,
plt.plot(xn, ml_predictions)"$x$")
plt.xlabel("$y$")
plt.ylabel(-5, 5)
plt.ylim(-5, 5) plt.xlim(
(-5.0, 5.0)
def poly_features(X, p):
"""Returns a matrix with p columns containing the polynomial features of the input vector X."""
= X.flatten()
X return np.array([1.0*X**i for i in range(p+1)]).T
def nonlinear_features_maximum_likelihood(Phi, y):
# Phi: features matrix for training inputs. Size of N x D
# y: training targets. Size of N by 1
# returns: maximum likelihood estimator theta_ml. Size of D x 1
= 1e-08 # 'jitter' term; good for numerical stability
kappa
= Phi.shape[1]
D
# maximum likelihood estimate
= Phi.T @ y # Phi^T*y
Pt = Phi.T @ Phi + kappa*np.eye(D) # Phi^T*Phi + kappa*I
PP
# maximum likelihood estimate
= scipy.linalg.cho_factor(PP)
C = scipy.linalg.cho_solve(C, Pt) # inv(Phi^T*Phi)*Phi^T*y
theta_ml
return theta_ml
= 4
p = poly_features(xn, p)
Phi = nonlinear_features_maximum_likelihood(Phi, yn)
theta_ml = np.linspace(-5,5, 100).reshape(-1,1)
X_test = poly_features(X_test, p)
Phi_test = Phi_test @ theta_ml y_pred
# Plot the training set
plt.figure()'+', markersize=10)
plt.plot(xn, yn,
plt.plot(X_test, y_pred)"$x$")
plt.xlabel("$y$")
plt.ylabel(-5, 5)
plt.ylim(-5, 5) plt.xlim(
(-5.0, 5.0)
Figure 9.6
# Values of p to consider
= [0, 1, 3, 4, 6, 9]
p_values
# Create a 2x3 grid of subplots
= plt.subplots(2, 3, figsize=(12, 8))
fig, axs
for i, p in enumerate(p_values):
= poly_features(xn, p)
Phi = nonlinear_features_maximum_likelihood(Phi, yn)
theta_ml = poly_features(X_test, p)
Phi_test = Phi_test @ theta_ml
y_pred
= axs[i // 3, i % 3] # Get the correct subplot
ax '+', markersize=10,label='Training data')
ax.plot(xn, yn, = 'MLE')
ax.plot(X_test, y_pred, label "$x$")
ax.set_xlabel("$y$")
ax.set_ylabel(-5, 5)
ax.set_ylim(-5, 5)
ax.set_xlim(f"P = {p}")
ax.set_title(
ax.legend()
# Adjust the spacing between subplots
plt.tight_layout()
# Display the plot
plt.show()
%config InlineBackend.figure_format = "retina"
def f(x, a): return a[0] + a[1] * x
def plot_prior(m, S, liminf=-1, limsup=1, step=0.05, ax=plt, **kwargs):
= np.mgrid[liminf:limsup + step:step, liminf:limsup + step:step]
grid = grid.shape[-1]
nx = multivariate_normal.pdf(grid.T.reshape(-1, 2), mean=m.ravel(), cov=S).reshape(nx, nx).T
z
return ax.contourf(*grid, z, **kwargs)
def plot_sample_w(mean, cov, size=10, ax=plt):
= np.random.multivariate_normal(mean=mean.ravel(), cov=cov, size=size)
w = np.linspace(-1, 1)
x for wi in w:
="tab:blue", alpha=0.4)
ax.plot(x, f(x, wi), c
def plot_likelihood_obs(X, T, ix, ax=plt):
"""
Plot the likelihood function of a single observation
"""
= np.mgrid[-1:1:0.1, -1:1:0.1]
W = sample_vals(X, T, ix) # ith row
x, t = W.T.reshape(-1, 2) @ x.T
mean
= norm.pdf(t, loc=mean, scale= np.sqrt(1 / beta)).reshape(20, 20).T
likelihood *W, likelihood)
ax.contourf(-0.3, 0.5, c="white", marker="+")
ax.scatter(
def sample_vals(X, T, ix):
"""
Returns
-------
Phi: The linear model transormation
t: the target datapoint
return ith data
"""
= X[ix]
x_in = np.c_[np.ones_like(x_in), x_in]
Phi = T[[ix]]
t return Phi, t
def posterior_w(phi, t, S0, m0):
"""
Compute the posterior distribution of
a Gaussian with known precision and conjugate
prior a gaussian
Parameters
----------
phi: np.array(N, M)
t: np.array(N, 1)
S0: np.array(M, M)
The prior covariance matrix
m0: np.array(M, 1)
The prior mean vector
"""
= inv(inv(S0) + beta * phi.T @ phi)
SN = SN @ (inv(S0) @ m0 + beta * phi.T @ t)
mN return SN, mN
314)
seed(= np.array([-0.3, 0.5])
a = 30
N = 0.2
sigma = uniform(-1, 1, (N, 1))
X = f(X, a) + randn(N, 1) * sigma T
= (1 / sigma) ** 2 # precision
beta = 2.0 alpha
= np.eye(2) / alpha
SN = np.zeros((2, 1))
mN 1643) seed(
= [1, 5,30]
nobs = 1
ix_fig = plt.subplots(len(nobs) + 1, 3, figsize=(10, 12))
fig, ax =ax[0,1])
plot_prior(mN, SN, ax0, 1].scatter(-0.3, 0.5, c="white", marker="+")
ax[0, 0].axis("off")
ax[=ax[0, 2])
plot_sample_w(mN, SN, axfor i in range(0, N):
= sample_vals(X, T, i)
Phi, t = posterior_w(Phi, t, SN, mN)
SN, mN if i+1 in nobs:
=ax[ix_fig, 0])
plot_likelihood_obs(X, T, i, ax=ax[ix_fig, 1])
plot_prior(mN, SN, ax1].scatter(-0.3, 0.5, c="white", marker="+")
ax[ix_fig, 2].scatter(X[:i + 1], T[:i + 1], c="crimson")
ax[ix_fig, 2].set_xlim(-1, 1)
ax[ix_fig, 2].set_ylim(-1, 1)
ax[ix_fig, for l in range(2):
"$w_0$")
ax[ix_fig, l].set_xlabel("$w_1$")
ax[ix_fig, l].set_ylabel(=ax[ix_fig, 2])
plot_sample_w(mN, SN, ax+= 1
ix_fig
= ["likelihood", "prior/posterior", "data space"]
titles for axi, title in zip(ax[0], titles):
=15)
axi.set_title(title, size plt.tight_layout()