Skip to content

Commit 4cd7b67

Browse files
committed
Final code.
1 parent 46922c0 commit 4cd7b67

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+17218
-0
lines changed

Exercise 2/.ipynb_checkpoints/Exercise 2 heartDataSet-checkpoint.ipynb

Lines changed: 896 additions & 0 deletions
Large diffs are not rendered by default.

Exercise 2/.ipynb_checkpoints/Exercise 2 scriptToyData-checkpoint.ipynb

Lines changed: 997 additions & 0 deletions
Large diffs are not rendered by default.

Exercise 2/.ipynb_checkpoints/Exercise 2 scriptToyDataKMeans-checkpoint.ipynb

Lines changed: 966 additions & 0 deletions
Large diffs are not rendered by default.

Exercise 2/Assignment2.pdf

820 KB
Binary file not shown.

Exercise 2/Exercise 2 heartDataSet.ipynb

Lines changed: 896 additions & 0 deletions
Large diffs are not rendered by default.

Exercise 2/Exercise 2 scriptToyData.ipynb

Lines changed: 997 additions & 0 deletions
Large diffs are not rendered by default.

Exercise 2/Exercise 2 scriptToyDataKMeans.ipynb

Lines changed: 966 additions & 0 deletions
Large diffs are not rendered by default.

Exercise 2/MlmiExercise2.tar.gz

955 KB
Binary file not shown.

Exercise 2/data/Faces.mat

664 KB
Binary file not shown.

Exercise 2/data/filtHeartDataSet.mat

8.34 KB
Binary file not shown.

Exercise 2/data/toydata.mat

2.48 KB
Binary file not shown.

Exercise 2/data/toydata2.mat

22.8 KB
Binary file not shown.

Exercise 2/myPCA.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Tue Apr 19 16:39:22 2016
4+
5+
@author: qwertzuiopu
6+
"""
7+
8+
import numpy as np
9+
import scipy.io as sio
10+
11+
def usingSVD(dataMatrix, desiredVariancePercentage=1.0):
12+
# This function should implement the PCA using the Singular Value
13+
# Decomposition (SVD) of the given dataMatrix
14+
15+
# De-Meaning the feature space
16+
meanDataMatrix = dataMatrix.mean(1)
17+
demeanedDataMatrix = (dataMatrix.T - meanDataMatrix)
18+
19+
# SVD Decomposition
20+
# You need to transpose the data matrix
21+
U, s, V = np.linalg.svd(demeanedDataMatrix / np.sqrt(dataMatrix.shape[1]-1)) # Divide by sqrt(N-1) to get consistent results
22+
V = V.T # Numpy gives V transposed
23+
24+
# Enforce a sign convention on the coefficients -- the largest element (absolute) in each
25+
# column will have a positive sign.
26+
for i in range(V.shape[0]):
27+
if V[:,i][np.argmax(np.fabs(V[:,i]))] < 0:
28+
V[:,i] = -V[:,i]
29+
30+
# Compute the accumelative Eigenvalues to finde the desired
31+
# Variance
32+
s = s**2
33+
acc_eigenvals = np.sum(s)
34+
35+
# Keep the eigenvectors and eigenvalues of the desired
36+
# variance, i.e. keep the first two eigenvectors and
37+
# eigenvalues if they have 90% of variance.
38+
for i in range(s.shape[0]):
39+
if np.sum(s[:i+1])/acc_eigenvals >= desiredVariancePercentage:
40+
eigenvals = s[:i+1]
41+
eigenvecs = V[:,:i+1]
42+
break
43+
44+
# Project the data
45+
projectedData = np.dot(demeanedDataMatrix, eigenvecs)
46+
47+
# Return Data
48+
return eigenvecs, eigenvals, meanDataMatrix, demeanedDataMatrix.T, projectedData.T
49+
50+
def usingCOV(dataMatrix, desiredVariancePercentage=1.0):
51+
# This function should implement the PCA using the
52+
# EigenValue Decomposition of a given Covariance Matrix
53+
54+
# De-Meaning the feature space
55+
meanDataMatrix = dataMatrix.mean(1)
56+
demeanedDataMatrix = (dataMatrix.T - meanDataMatrix)
57+
58+
# Computing the Covariance
59+
covMatrix = np.dot(demeanedDataMatrix.T, demeanedDataMatrix)
60+
61+
# Eigen Value Decomposition
62+
# In COV, you need to order the eigevectors according to largest eigenvalues
63+
eigenValues, eigenVectors = np.linalg.eig(covMatrix / (dataMatrix.shape[1] - 1))
64+
idx = eigenValues.argsort()[::-1]
65+
eigenValues = eigenValues[idx]
66+
eigenVectors = eigenVectors[:,idx]
67+
68+
# Enforce a sign convention on the coefficients -- the largest element (absolute) in each
69+
# column will have a positive sign.
70+
for i in range(eigenVectors.shape[0]):
71+
if eigenVectors[:,i][np.argmax(np.fabs(eigenVectors[:,i]))] < 0:
72+
eigenVectors[:,i] = -eigenVectors[:,i]
73+
74+
75+
# Compute the accumelative Eigenvalues to finde the desired
76+
# Variance
77+
acc_eigenvals = np.sum(eigenValues)
78+
79+
# Keep the eigenvectors and eigenvalues of the desired
80+
# variance, i.e. keep the first two eigenvectors and
81+
# eigenvalues if they have 90% of variance.
82+
for i in range(eigenValues.shape[0]):
83+
if np.sum(eigenValues[:i+1])/acc_eigenvals >= desiredVariancePercentage:
84+
eigenvals = eigenValues[:i+1]
85+
eigenvecs = eigenVectors[:,:i+1]
86+
break
87+
88+
# Project the data
89+
projectedData = np.dot(demeanedDataMatrix, eigenvecs)
90+
91+
# Return data
92+
return eigenvecs, eigenvals, meanDataMatrix, demeanedDataMatrix.T, projectedData.T

0 commit comments

Comments
 (0)