import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
import matplotlib
plt.style.use('ggplot')
%matplotlib inline
%config InlineBackend.figure_format='retina'
matplotlib.rcParams['figure.figsize'] = (5.0, 4.0)
from sklearn import datasets
from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
import matplotlib.pyplot as plt
lr = linear_model.LinearRegression()
boston = datasets.load_boston()
y = boston.target
# cross_val_predict returns an array of the same size as `y` where each entry
# is a prediction obtained by cross validation:
predicted = cross_val_predict(lr, boston.data, y, cv=10)
fig, ax = plt.subplots()
ax.scatter(y, predicted, s=15, color='green')
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()
B = np.array([y]).T
C = np.array([predicted]).T
D = np.hstack((B, C))
print(D.shape)
(506, 2)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.axhline(0, linestyle='--', color='black', linewidth=1) # horizontal lines
ax.axvline(0, linestyle='--',color='black', linewidth=1)
# ax.set_xticks([0])
# ax.set_yticks(np.arange(0.1,0.1))
plt.scatter(D[:,0], D[:,1],s=15, color='green')
plt.title("Original Data")
plt.grid(False)
plt.show()
D-=np.mean(D, axis=0)
cov=np.dot(D.T, D)/D.shape[0]
print(D)
print(cov)
print(cov.shape)
[[ 1.46719368 7.55263402] [ -0.93280632 2.23631529] [ 12.16719368 7.86597834] ... [ 1.36719368 5.58959447] [ -0.53280632 4.05339594] [-10.63280632 0.14350337]] [[84.41955616 55.86219275] [55.86219275 61.84316139]] (2, 2)
U,S,V=np.linalg.svd(cov)
Xrot=np.dot(D, U)
print(Xrot.shape)
(506, 2)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.axhline(0, linestyle='--', color='black', linewidth=1) # horizontal lines
ax.axvline(0, linestyle='--',color='black', linewidth=1)
ax.set_xlim([-40,40])
ax.set_ylim([-25, 25])
# ax.set_xticks([0])
# ax.set_yticks(np.arange(0.1,0.1))
plt.scatter(Xrot[:,0], Xrot[:,1], s=15, color='green')
plt.title("PCA data")
plt.grid(False)
plt.show()
Xwhite=Xrot/np.sqrt(S+1e-5)
print(Xwhite)
[[-0.51880136 1.22377534] [-0.06084869 0.57785563] [-1.26218958 -0.40235157] ... [-0.40304657 0.86135378] [-0.18885617 0.86487308] [ 0.71346918 1.70354864]]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.axhline(0, linestyle='--', color='black', linewidth=1) # horizontal lines
ax.axvline(0, linestyle='--',color='black', linewidth=1)
ax.set_xlim([-40,40])
ax.set_ylim([-25, 25])
# ax.set_xticks([0])
# ax.set_yticks(np.arange(0.1,0.1))
plt.scatter(Xwhite[:,0], Xwhite[:,1], s=10, color='green')
plt.title("Whitened data")
plt.grid(False)
plt.show()