1. Running Logistic Regression models on the Stock Market dataset
summary(Smarket)
      Year           Lag1                Lag2                Lag3                Lag4                Lag5         
 Min.   :2001   Min.   :-4.922000   Min.   :-4.922000   Min.   :-4.922000   Min.   :-4.922000   Min.   :-4.92200  
 1st Qu.:2002   1st Qu.:-0.639500   1st Qu.:-0.639500   1st Qu.:-0.640000   1st Qu.:-0.640000   1st Qu.:-0.64000  
 Median :2003   Median : 0.039000   Median : 0.039000   Median : 0.038500   Median : 0.038500   Median : 0.03850  
 Mean   :2003   Mean   : 0.003834   Mean   : 0.003919   Mean   : 0.001716   Mean   : 0.001636   Mean   : 0.00561  
 3rd Qu.:2004   3rd Qu.: 0.596750   3rd Qu.: 0.596750   3rd Qu.: 0.596750   3rd Qu.: 0.596750   3rd Qu.: 0.59700  
 Max.   :2005   Max.   : 5.733000   Max.   : 5.733000   Max.   : 5.733000   Max.   : 5.733000   Max.   : 5.73300  
     Volume           Today           Direction 
 Min.   :0.3561   Min.   :-4.922000   Down:602  
 1st Qu.:1.2574   1st Qu.:-0.639500   Up  :648  
 Median :1.4229   Median : 0.038500             
 Mean   :1.4783   Mean   : 0.003138             
 3rd Qu.:1.6417   3rd Qu.: 0.596750             
 Max.   :3.1525   Max.   : 5.733000             

glm_probs[1:5]
        1         2         3         4         5 
0.5070841 0.4814679 0.4811388 0.5152224 0.5107812 

Checking our performance

#Getting our classification performance
mean(glm_pred==Smarket$Direction)
[1] 0.5216

Checking performance on test data

mean(glm_pred_test == Smarket_Direction_2005)
[1] 0.4801587
glm_fit_train2 <- glm(Direction ~ Lag1 + Lag2, data = Smarket, family = binomial, subset = train)
glm_probs_test2 <- predict(glm_fit_train2, newdata = Smarket[!train,], type = "response")
glm_pred_test2 <- ifelse(glm_probs_test2>0.5, "Up", "Down")

Checking performance of updated classification codes

table(glm_pred_test2, Smarket_Direction_2005)
              Smarket_Direction_2005
glm_pred_test2 Down  Up
          Down   35  35
          Up     76 106
mean(glm_pred_test2 == Smarket_Direction_2005)
[1] 0.5595238

  1. Running Linear Discriminant Analysis on the Stock Market dataset
rm(ls=list())
Error in rm(ls = list()) : ... must contain names or character strings

Fitting a LDA model

Checking what the performance is on test data (year = 2005)

mean(lda_pred$class==Smarket_2005$Direction)
[1] 0.5595238

  1. K-Nearest Neighbours classification
mean(knn_pred == Smarket$Direction[!train])
[1] 0.5

K-Nearest Neighbors

library(class) ?knn attach(Smarket) Xlag=cbind(Lag1,Lag2) train=Year<2005 knn.pred=knn(Xlag[train,],Xlag[!train,],Direction[train],k=1) table(knn.pred,Direction[!train]) mean(knn.pred==Direction[!train])

LS0tDQp0aXRsZTogIklTTFIgLSBMZWFybmluZyBDbGFzc2lmaWNhdGlvbiB1c2luZyB0aGUgU3RvY2sgTWFya2V0IERhdGEiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoxKSBSdW5uaW5nIExvZ2lzdGljIFJlZ3Jlc3Npb24gbW9kZWxzIG9uIHRoZSBTdG9jayBNYXJrZXQgZGF0YXNldA0KDQpgYGB7cn0NCmxpYnJhcnkoSVNMUikNCm5hbWVzKFNtYXJrZXQpDQpzdW1tYXJ5KFNtYXJrZXQpDQpgYGANCg0KYGBge3J9DQpwYWlycyhTbWFya2V0LCBjb2wgPSBTbWFya2V0JERpcmVjdGlvbikNCmBgYA0KDQotIFJ1bm5pbmcgYSBsb2dpc3RpYyByZWdyZXNzaW9uIG1vZGVsDQoNCmBgYHtyfQ0KZ2xtX2ZpdCA8LSBnbG0oRGlyZWN0aW9uIH4gTGFnMSArIExhZzIgKyBMYWczICsgTGFnNCArIExhZzUgKyBWb2x1bWUsIGRhdGEgPSBTbWFya2V0LCBmYW1pbHkgPSBiaW5vbWlhbCkNCnN1bW1hcnkoZ2xtX2ZpdCkNCmdsbV9wcm9icyA8LSBwcmVkaWN0KGdsbV9maXQsIHR5cGUgPSAicmVzcG9uc2UiKQ0KZ2xtX3Byb2JzWzE6NV0NCmdsbV9wcmVkIDwtIGlmZWxzZShnbG1fcHJvYnM+MC41LCJVcCIsIkRvd24iKQ0KYGBgDQoNCg0KQ2hlY2tpbmcgb3VyIHBlcmZvcm1hbmNlDQoNCmBgYHtyfQ0KI0dldHRpbmcgdGhlIGNsYXNzaWZpY2F0aW9uIGNvbmZ1c2lvbiBtYXRyaXgNCnRhYmxlKGdsbV9wcmVkLCBTbWFya2V0JERpcmVjdGlvbikNCg0KI0dldHRpbmcgb3VyIGNsYXNzaWZpY2F0aW9uIHBlcmZvcm1hbmNlDQojQ29ycmVjdCBjbGFzc2lmaWNhdGlvbnMvVG90YWwgbnVtYmVyIG9mIG9ic2VydmF0aW9ucw0KbWVhbihnbG1fcHJlZD09U21hcmtldCREaXJlY3Rpb24pDQpgYGANCg0KLSBTcGxpdHRpbmcgaW50byB0cmFpbi10ZXN0DQpgYGB7cn0NCnRyYWluIDwtIFNtYXJrZXQkWWVhciA8IDIwMDUNCmdsbV9maXRfdHJhaW4gPC0gZ2xtKERpcmVjdGlvbiB+IExhZzEgKyBMYWcyICsgTGFnMyArIExhZzQgKyBMYWc1ICsgVm9sdW1lLCBkYXRhID0gU21hcmtldCwgZmFtaWx5ID0gYmlub21pYWwsIHN1YnNldCA9IHRyYWluKQ0KZ2xtX3Byb2JzX3Rlc3QgPC0gcHJlZGljdChnbG1fZml0X3RyYWluLCBuZXdkYXRhID0gU21hcmtldFshdHJhaW4sXSwgdHlwZSA9ICJyZXNwb25zZSIpDQpnbG1fcHJlZF90ZXN0IDwtIGlmZWxzZShnbG1fcHJvYnNfdGVzdD4wLjUsICJVcCIsICJEb3duIikNCmBgYA0KDQpDaGVja2luZyBwZXJmb3JtYW5jZSBvbiB0ZXN0IGRhdGENCmBgYHtyfQ0KU21hcmtldF9EaXJlY3Rpb25fMjAwNSA8LSBTbWFya2V0JERpcmVjdGlvblshdHJhaW5dDQp0YWJsZShnbG1fcHJlZF90ZXN0LCBTbWFya2V0X0RpcmVjdGlvbl8yMDA1KQ0KDQojQ29ycmVjdCBjbGFzc2lmaWNhdGlvbiBhY2N1cmFjeSBmb3IgdGVzdA0KbWVhbihnbG1fcHJlZF90ZXN0ID09IFNtYXJrZXRfRGlyZWN0aW9uXzIwMDUpDQpgYGANCg0KLSBDbGFzc2lmaWNhdGlvbiB1c2luZyBvbmx5IGxhZzEgYW5kIGxhZzIgdGVybXMNCmBgYHtyfQ0KZ2xtX2ZpdF90cmFpbjIgPC0gZ2xtKERpcmVjdGlvbiB+IExhZzEgKyBMYWcyLCBkYXRhID0gU21hcmtldCwgZmFtaWx5ID0gYmlub21pYWwsIHN1YnNldCA9IHRyYWluKQ0KZ2xtX3Byb2JzX3Rlc3QyIDwtIHByZWRpY3QoZ2xtX2ZpdF90cmFpbjIsIG5ld2RhdGEgPSBTbWFya2V0WyF0cmFpbixdLCB0eXBlID0gInJlc3BvbnNlIikNCmdsbV9wcmVkX3Rlc3QyIDwtIGlmZWxzZShnbG1fcHJvYnNfdGVzdDI+MC41LCAiVXAiLCAiRG93biIpDQpgYGANCg0KQ2hlY2tpbmcgcGVyZm9ybWFuY2Ugb2YgdXBkYXRlZCBjbGFzc2lmaWNhdGlvbiBjb2Rlcw0KYGBge3J9DQp0YWJsZShnbG1fcHJlZF90ZXN0MiwgU21hcmtldF9EaXJlY3Rpb25fMjAwNSkNCm1lYW4oZ2xtX3ByZWRfdGVzdDIgPT0gU21hcmtldF9EaXJlY3Rpb25fMjAwNSkNCg0KYGBgDQoNCg0KDQotLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQoNCjIpIFJ1bm5pbmcgTGluZWFyIERpc2NyaW1pbmFudCBBbmFseXNpcyBvbiB0aGUgU3RvY2sgTWFya2V0IGRhdGFzZXQNCg0KYGBge3J9DQpsaWJyYXJ5KE1BU1MpDQpybShsaXN0ID0gbHMoKSkNCmBgYA0KDQoNCkZpdHRpbmcgYSBMREEgbW9kZWwNCmBgYHtyfQ0KbGRhX2ZpdCA8LSBsZGEoRGlyZWN0aW9uIH4gTGFnMSArIExhZzIsIGRhdGEgPSBTbWFya2V0LCBzdWJzZXQgPSBZZWFyIDwgMjAwNSkNCmxkYV9maXQNCnBsb3QobGRhX2ZpdCkNCmBgYA0KDQoNCkNoZWNraW5nIHdoYXQgdGhlIHBlcmZvcm1hbmNlIGlzIG9uIHRlc3QgZGF0YSAoeWVhciA9IDIwMDUpDQoNCmBgYHtyfQ0KU21hcmtldF8yMDA1IDwtIHN1YnNldChTbWFya2V0LCBZZWFyID09IDIwMDUpDQpsZGFfcHJlZCA9IHByZWRpY3QobGRhX2ZpdCwgU21hcmtldF8yMDA1KQ0KZGF0YS5mcmFtZShsZGFfcHJlZClbMTo1LF0NCg0KdGFibGUobGRhX3ByZWQkY2xhc3MsU21hcmtldF8yMDA1JERpcmVjdGlvbikNCm1lYW4obGRhX3ByZWQkY2xhc3M9PVNtYXJrZXRfMjAwNSREaXJlY3Rpb24pDQoNCmBgYA0KDQoNCi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KDQozKSBLLU5lYXJlc3QgTmVpZ2hib3VycyBjbGFzc2lmaWNhdGlvbg0KDQpgYGB7cn0NCmxpYnJhcnkoY2xhc3MpDQo/a25uDQoNClhsYWcgPC0gY2JpbmQoU21hcmtldCRMYWcxLCBTbWFya2V0JExhZzIpDQp0cmFpbiA8LSBTbWFya2V0JFllYXIgPCAyMDA1DQoNCmtubl9wcmVkIDwtIGtubihYbGFnW3RyYWluLF0sIFhsYWdbIXRyYWluLF0sIFNtYXJrZXQkRGlyZWN0aW9uW3RyYWluXSwgaz0xKQ0KDQp0YWJsZShrbm5fcHJlZCwgU21hcmtldCREaXJlY3Rpb25bIXRyYWluXSkNCm1lYW4oa25uX3ByZWQgPT0gU21hcmtldCREaXJlY3Rpb25bIXRyYWluXSkNCg0KYGBgDQoNCg0KDQojIyBLLU5lYXJlc3QgTmVpZ2hib3JzDQpsaWJyYXJ5KGNsYXNzKQ0KP2tubg0KYXR0YWNoKFNtYXJrZXQpDQpYbGFnPWNiaW5kKExhZzEsTGFnMikNCnRyYWluPVllYXI8MjAwNQ0Ka25uLnByZWQ9a25uKFhsYWdbdHJhaW4sXSxYbGFnWyF0cmFpbixdLERpcmVjdGlvblt0cmFpbl0saz0xKQ0KdGFibGUoa25uLnByZWQsRGlyZWN0aW9uWyF0cmFpbl0pDQptZWFuKGtubi5wcmVkPT1EaXJlY3Rpb25bIXRyYWluXSkNCg==