4d02b85e.319a7806.js

(window.webpackJsonp=window.webpackJsonp||[]).push([[13],{137:function(e,t,a){"use strict";a.r(t),t.default=a.p+"assets/files/b-14-pca-8a237557046ada44e5592de0fe9e36b7.pdf"},69:function(e,t,a){"use strict";a.r(t),a.d(t,"frontMatter",(function(){return o})),a.d(t,"metadata",(function(){return s})),a.d(t,"rightToc",(function(){return l})),a.d(t,"default",(function(){return p}));var n=a(2),i=a(6),r=(a(0),a(94)),o={title:"Principal Component Analysis (PCA)"},s={unversionedId:"sec-b/b-14-pca",id:"sec-b/b-14-pca",isDocsHomePage:!1,title:"Principal Component Analysis (PCA)",description:"Authors: Katelyn Vincent, Yiqun Tian, Yue Tian. (PDF)",source:"@site/docs\\sec-b\\b-14-pca.md",slug:"/sec-b/b-14-pca",permalink:"/APM-2020/docs/sec-b/b-14-pca",version:"current",lastUpdatedAt:1604131829,sidebar:"docs",previous:{title:"Interactive Online Visualization",permalink:"/APM-2020/docs/sec-b/b-13-dd-docs"},next:{title:"Classification - Decision Trees and Bayes Decision Theory",permalink:"/APM-2020/docs/sec-b/b-15-dt"}},l=[{value:"What is PCA?",id:"what-is-pca",children:[]},{value:"When are the pros and cons of PCA, and when should you use it?",id:"when-are-the-pros-and-cons-of-pca-and-when-should-you-use-it",children:[]},{value:"What is the difference between PCA and Linear Regression?",id:"what-is-the-difference-between-pca-and-linear-regression",children:[]},{value:"Alternative for Classification - Linear Discriminant Analysis (LDA)",id:"alternative-for-classification---linear-discriminant-analysis-lda",children:[]},{value:"Manifold Destiny",id:"manifold-destiny",children:[]},{value:"References",id:"references",children:[]}],c={rightToc:l};function p(e){var t=e.components,o=Object(i.a)(e,["components"]);return Object(r.b)("wrapper",Object(n.a)({},c,o,{components:t,mdxType:"MDXLayout"}),Object(r.b)("p",null,"Authors: Katelyn Vincent, Yiqun Tian, Yue Tian. (",Object(r.b)("a",{target:"_blank",href:a(137).default},"PDF"),")"),Object(r.b)("h2",{id:"what-is-pca"},"What is PCA?"),Object(r.b)("p",null,"In this lecture, we discussed feature extraction methods, focusing primarily on Principal Component Analysis (PCA).  PCA is the simplest way of trying to project data into a low dimensional space.  The goal is to reduce dimensionality while keeping as much variance (spread) as you can from the original data.  Think about drawing a line and pressing on either side of that line until you squash all of the data points onto the line.  This line is our principal component (PC), and we are \u2018projecting\u2019 all of the data points onto it.  There are two ways of thinking about how to find the best line: we want to find the line that 1) maximizes the spread of data and 2) minimizes the sum of squared residuals.  "),Object(r.b)("p",null,Object(r.b)("img",Object(n.a)({parentName:"p"},{src:"https://blog.umetrics.com/hubfs/Blog%20images/figure%203.2%20PCA%20blog%2025.png",alt:null}))),Object(r.b)("p",null,"We can have more than one PC, and each additional PC is orthogonal (at a right angle) to all of the other PCs.  Sometimes what happens is that PC1 will tell you what the overall spread of the data is, and additional PCs (ex. PC2, PC3, PC4) will tell you how subpopulations differ from one another.  Each PC has an ",Object(r.b)("strong",{parentName:"p"},"eigenvalue")," - a measure of how much variance is retained.  PC1 will have the highest eigenvalue (eg 7.5), PC2 will have the second highest (eg. 2) and so on.  In this example, we can say that the first two principal components capture 95% of the variance in the data (7.5 + 2).  Typically, you\u2019ll want to pick enough principal components to cover 90-95% of the variance.  If the first few eigenvalues are much higher than the others, PCA can capture the data using a much lower dimensional space.  If all of the eigenvalues are roughly the same, it means the data falls the same way in every direction and there\u2019s no point in doing PCA. "),Object(r.b)("p",null,Object(r.b)("img",Object(n.a)({parentName:"p"},{src:"https://i.imgur.com/z06XIbn.png",alt:null}))),Object(r.b)("h2",{id:"when-are-the-pros-and-cons-of-pca-and-when-should-you-use-it"},"When are the pros and cons of PCA, and when should you use it?"),Object(r.b)("h4",{id:"pros-removes-correlated-features-reduces-overfitting-and-improves-performance"},"Pros: Removes correlated features, reduces overfitting, and improves performance"),Object(r.b)("p",null,"One of the most obvious benefits of PCA is that it reduces dimensionality while retaining most of the information and variance in our original features.  PCA is helpful when you need to reduce the number of features for modeling, but it's not clear that there are individual variables you should remove.  It is also helpful if you need to be sure that your features are independent of one another, since each of the principal components will be independent of one another. Because PCA reduces the number of features, it also helps to speed up training time.  "),Object(r.b)("h4",{id:"cons-variables-are-less-interpretable-requires-standardization-and-possibility-of-information-loss"},"Cons: Variables are less interpretable, requires standardization, and possibility of information loss"),Object(r.b)("p",null,"Because PCA combines information from multiple original features, the principal components are less interpretable than the original features.  PCA is affected by scale, so it is also important to scale features before applying PCA, as well as convert categorical variables to numeric.  Additionally, selecting too few principal components can result in information loss. "),Object(r.b)("h2",{id:"what-is-the-difference-between-pca-and-linear-regression"},"What is the difference between PCA and Linear Regression?"),Object(r.b)("p",null,"PCA is not linear regression. In fact, PCA and linear regression use totally different algorithms. For linear regression, it is trying to predict the value of Y given some info features of X and fit a straight line as to minimize the square error between the point and this straight line. However, for PCA there is no special variable Y that we are trying to predict. PCA minimizes the shortest orthogonal distance. "),Object(r.b)("p",null,Object(r.b)("img",Object(n.a)({parentName:"p"},{src:"https://i.imgur.com/nkE4Uxx.png",alt:null})),"\n",Object(r.b)("img",Object(n.a)({parentName:"p"},{src:"https://i.imgur.com/YNvHmsR.png",alt:null}))),Object(r.b)("p",null,"On the left graph, we can see linear regression calculates the square error as the vertical distance between true value and predicted value. On the right graph, PCA calculates the projected error as the shortest distance between true value and projected line. "),Object(r.b)("h2",{id:"alternative-for-classification---linear-discriminant-analysis-lda"},"Alternative for Classification - Linear Discriminant Analysis (LDA)"),Object(r.b)("p",null,"PCA does not use class information, so if you're trying to do classification then a better alternative might be Linear Discriminant Analysis (LDA).  As the name implies, the technique is also linear but uses the class levels (unlike PCA).  LDA can either be used for classification problems or as a dimensionality reduction technique in preprocessing.  While Logistic regression is a classification algorithm traditionally limited to only two-class classification problems, if you have more than two classes then Linear Discriminant Analysis is the preferred linear classification technique.  LDA essentially plots your features, then creates a new axis that 1) maximizes the distance between means of the two classes and 2) minimizes the variation within each class.  In simple terms, this newly generated axis increases the separation between the data points of the two classes."),Object(r.b)("p",null,Object(r.b)("img",Object(n.a)({parentName:"p"},{src:"https://images2.programmersought.com/460/b7/b7854fa863ed93f55db4cb389ba12b44.png",alt:null}))),Object(r.b)("h2",{id:"manifold-destiny"},"Manifold Destiny"),Object(r.b)("p",null,"While this discussion has mostly focused on two-dimensional examples, there are many datasets where you can do a much better job of capturing the data if the projection space is curved (instead of flat).  These surfaces are caled ",Object(r.b)("strong",{parentName:"p"},"manifolds"),".  In simple terms, an n-dimensional manifold is a space that locally looks like n-dimensional Euclidean space. You can think of it as putting a lot of n-dimensional shapes together to build a space. A simple example can be a flat map of the Earth. A map is a two-dimensional representation of the three-dimensional sphere, the Earth. Imagine the Earth has XYZ coordinates: locally, the z coordinate is barely changing, so the two-dimensional (XY coordinates) approximation is a good approximation about where a certain point is on the map.  One of the most populat techniques to do nonlinear transformation mappings and find these manifolds is ",Object(r.b)("strong",{parentName:"p"},"t-sne"),"."),Object(r.b)("p",null,Object(r.b)("img",Object(n.a)({parentName:"p"},{src:"https://www.researchgate.net/profile/Kilian_Weinberger/publication/201841023/figure/fig1/AS:276492534730763@1442932365687/The-problem-of-manifold-learning-illustrated-for-N-800-data-points-sampled-from-a.png",alt:null}))),Object(r.b)("h2",{id:"references"},"References"),Object(r.b)("p",null,Object(r.b)("a",Object(n.a)({parentName:"p"},{href:"https://blog.umetrics.com/what-is-principal-component-analysis-pca-and-how-it-is-used"}),"https://blog.umetrics.com/what-is-principal-component-analysis-pca-and-how-it-is-used")),Object(r.b)("p",null,Object(r.b)("a",Object(n.a)({parentName:"p"},{href:"https://www.i2tutorials.com/what-are-the-pros-and-cons-of-the-pca/"}),"https://www.i2tutorials.com/what-are-the-pros-and-cons-of-the-pca/")),Object(r.b)("p",null,Object(r.b)("a",Object(n.a)({parentName:"p"},{href:"https://sebastianraschka.com/Articles/2014_python_lda.html"}),"https://sebastianraschka.com/Articles/2014_python_lda.html")),Object(r.b)("p",null,Object(r.b)("a",Object(n.a)({parentName:"p"},{href:"https://www.programmersought.com/article/33174132390/"}),"https://www.programmersought.com/article/33174132390/")),Object(r.b)("p",null,Object(r.b)("a",Object(n.a)({parentName:"p"},{href:"https://www.researchgate.net/figure/The-problem-of-manifold-learning-illustrated-for-N-800-data-points-sampled-from-a_fig1_201841023"}),"https://www.researchgate.net/figure/The-problem-of-manifold-learning-illustrated-for-N-800-data-points-sampled-from-a_fig1_201841023")))}p.isMDXComponent=!0},94:function(e,t,a){"use strict";a.d(t,"a",(function(){return h})),a.d(t,"b",(function(){return f}));var n=a(0),i=a.n(n);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function s(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?o(Object(a),!0).forEach((function(t){r(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):o(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function l(e,t){if(null==e)return{};var a,n,i=function(e,t){if(null==e)return{};var a,n,i={},r=Object.keys(e);for(n=0;n<r.length;n++)a=r[n],t.indexOf(a)>=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n<r.length;n++)a=r[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var c=i.a.createContext({}),p=function(e){var t=i.a.useContext(c),a=t;return e&&(a="function"==typeof e?e(t):s(s({},t),e)),a},h=function(e){var t=p(e.components);return i.a.createElement(c.Provider,{value:t},e.children)},u={inlineCode:"code",wrapper:function(e){var t=e.children;return i.a.createElement(i.a.Fragment,{},t)}},d=i.a.forwardRef((function(e,t){var a=e.components,n=e.mdxType,r=e.originalType,o=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),h=p(a),d=n,f=h["".concat(o,".").concat(d)]||h[d]||u[d]||r;return a?i.a.createElement(f,s(s({ref:t},c),{},{components:a})):i.a.createElement(f,s({ref:t},c))}));function f(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var r=a.length,o=new Array(r);o[0]=d;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:n,o[1]=s;for(var c=2;c<r;c++)o[c]=a[c];return i.a.createElement.apply(null,o)}return i.a.createElement.apply(null,a)}d.displayName="MDXCreateElement"}}]);