- Running Logistic Regression models on the Stock Market dataset
summary(Smarket)
Year Lag1 Lag2 Lag3 Lag4 Lag5
Min. :2001 Min. :-4.922000 Min. :-4.922000 Min. :-4.922000 Min. :-4.922000 Min. :-4.92200
1st Qu.:2002 1st Qu.:-0.639500 1st Qu.:-0.639500 1st Qu.:-0.640000 1st Qu.:-0.640000 1st Qu.:-0.64000
Median :2003 Median : 0.039000 Median : 0.039000 Median : 0.038500 Median : 0.038500 Median : 0.03850
Mean :2003 Mean : 0.003834 Mean : 0.003919 Mean : 0.001716 Mean : 0.001636 Mean : 0.00561
3rd Qu.:2004 3rd Qu.: 0.596750 3rd Qu.: 0.596750 3rd Qu.: 0.596750 3rd Qu.: 0.596750 3rd Qu.: 0.59700
Max. :2005 Max. : 5.733000 Max. : 5.733000 Max. : 5.733000 Max. : 5.733000 Max. : 5.73300
Volume Today Direction
Min. :0.3561 Min. :-4.922000 Down:602
1st Qu.:1.2574 1st Qu.:-0.639500 Up :648
Median :1.4229 Median : 0.038500
Mean :1.4783 Mean : 0.003138
3rd Qu.:1.6417 3rd Qu.: 0.596750
Max. :3.1525 Max. : 5.733000

- Running a logistic regression model
glm_probs[1:5]
1 2 3 4 5
0.5070841 0.4814679 0.4811388 0.5152224 0.5107812
Checking our performance
#Getting our classification performance
mean(glm_pred==Smarket$Direction)
[1] 0.5216
- Splitting into train-test
Checking performance on test data
mean(glm_pred_test == Smarket_Direction_2005)
[1] 0.4801587
- Classification using only lag1 and lag2 terms
glm_fit_train2 <- glm(Direction ~ Lag1 + Lag2, data = Smarket, family = binomial, subset = train)
glm_probs_test2 <- predict(glm_fit_train2, newdata = Smarket[!train,], type = "response")
glm_pred_test2 <- ifelse(glm_probs_test2>0.5, "Up", "Down")
Checking performance of updated classification codes
table(glm_pred_test2, Smarket_Direction_2005)
Smarket_Direction_2005
glm_pred_test2 Down Up
Down 35 35
Up 76 106
mean(glm_pred_test2 == Smarket_Direction_2005)
[1] 0.5595238
- Running Linear Discriminant Analysis on the Stock Market dataset
rm(ls=list())
Error in rm(ls = list()) : ... must contain names or character strings
Fitting a LDA model

Checking what the performance is on test data (year = 2005)
mean(lda_pred$class==Smarket_2005$Direction)
[1] 0.5595238
- K-Nearest Neighbours classification
mean(knn_pred == Smarket$Direction[!train])
[1] 0.5
K-Nearest Neighbors
library(class) ?knn attach(Smarket) Xlag=cbind(Lag1,Lag2) train=Year<2005 knn.pred=knn(Xlag[train,],Xlag[!train,],Direction[train],k=1) table(knn.pred,Direction[!train]) mean(knn.pred==Direction[!train])
LS0tDQp0aXRsZTogIklTTFIgLSBMZWFybmluZyBDbGFzc2lmaWNhdGlvbiB1c2luZyB0aGUgU3RvY2sgTWFya2V0IERhdGEiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoxKSBSdW5uaW5nIExvZ2lzdGljIFJlZ3Jlc3Npb24gbW9kZWxzIG9uIHRoZSBTdG9jayBNYXJrZXQgZGF0YXNldA0KDQpgYGB7cn0NCmxpYnJhcnkoSVNMUikNCm5hbWVzKFNtYXJrZXQpDQpzdW1tYXJ5KFNtYXJrZXQpDQpgYGANCg0KYGBge3J9DQpwYWlycyhTbWFya2V0LCBjb2wgPSBTbWFya2V0JERpcmVjdGlvbikNCmBgYA0KDQotIFJ1bm5pbmcgYSBsb2dpc3RpYyByZWdyZXNzaW9uIG1vZGVsDQoNCmBgYHtyfQ0KZ2xtX2ZpdCA8LSBnbG0oRGlyZWN0aW9uIH4gTGFnMSArIExhZzIgKyBMYWczICsgTGFnNCArIExhZzUgKyBWb2x1bWUsIGRhdGEgPSBTbWFya2V0LCBmYW1pbHkgPSBiaW5vbWlhbCkNCnN1bW1hcnkoZ2xtX2ZpdCkNCmdsbV9wcm9icyA8LSBwcmVkaWN0KGdsbV9maXQsIHR5cGUgPSAicmVzcG9uc2UiKQ0KZ2xtX3Byb2JzWzE6NV0NCmdsbV9wcmVkIDwtIGlmZWxzZShnbG1fcHJvYnM+MC41LCJVcCIsIkRvd24iKQ0KYGBgDQoNCg0KQ2hlY2tpbmcgb3VyIHBlcmZvcm1hbmNlDQoNCmBgYHtyfQ0KI0dldHRpbmcgdGhlIGNsYXNzaWZpY2F0aW9uIGNvbmZ1c2lvbiBtYXRyaXgNCnRhYmxlKGdsbV9wcmVkLCBTbWFya2V0JERpcmVjdGlvbikNCg0KI0dldHRpbmcgb3VyIGNsYXNzaWZpY2F0aW9uIHBlcmZvcm1hbmNlDQojQ29ycmVjdCBjbGFzc2lmaWNhdGlvbnMvVG90YWwgbnVtYmVyIG9mIG9ic2VydmF0aW9ucw0KbWVhbihnbG1fcHJlZD09U21hcmtldCREaXJlY3Rpb24pDQpgYGANCg0KLSBTcGxpdHRpbmcgaW50byB0cmFpbi10ZXN0DQpgYGB7cn0NCnRyYWluIDwtIFNtYXJrZXQkWWVhciA8IDIwMDUNCmdsbV9maXRfdHJhaW4gPC0gZ2xtKERpcmVjdGlvbiB+IExhZzEgKyBMYWcyICsgTGFnMyArIExhZzQgKyBMYWc1ICsgVm9sdW1lLCBkYXRhID0gU21hcmtldCwgZmFtaWx5ID0gYmlub21pYWwsIHN1YnNldCA9IHRyYWluKQ0KZ2xtX3Byb2JzX3Rlc3QgPC0gcHJlZGljdChnbG1fZml0X3RyYWluLCBuZXdkYXRhID0gU21hcmtldFshdHJhaW4sXSwgdHlwZSA9ICJyZXNwb25zZSIpDQpnbG1fcHJlZF90ZXN0IDwtIGlmZWxzZShnbG1fcHJvYnNfdGVzdD4wLjUsICJVcCIsICJEb3duIikNCmBgYA0KDQpDaGVja2luZyBwZXJmb3JtYW5jZSBvbiB0ZXN0IGRhdGENCmBgYHtyfQ0KU21hcmtldF9EaXJlY3Rpb25fMjAwNSA8LSBTbWFya2V0JERpcmVjdGlvblshdHJhaW5dDQp0YWJsZShnbG1fcHJlZF90ZXN0LCBTbWFya2V0X0RpcmVjdGlvbl8yMDA1KQ0KDQojQ29ycmVjdCBjbGFzc2lmaWNhdGlvbiBhY2N1cmFjeSBmb3IgdGVzdA0KbWVhbihnbG1fcHJlZF90ZXN0ID09IFNtYXJrZXRfRGlyZWN0aW9uXzIwMDUpDQpgYGANCg0KLSBDbGFzc2lmaWNhdGlvbiB1c2luZyBvbmx5IGxhZzEgYW5kIGxhZzIgdGVybXMNCmBgYHtyfQ0KZ2xtX2ZpdF90cmFpbjIgPC0gZ2xtKERpcmVjdGlvbiB+IExhZzEgKyBMYWcyLCBkYXRhID0gU21hcmtldCwgZmFtaWx5ID0gYmlub21pYWwsIHN1YnNldCA9IHRyYWluKQ0KZ2xtX3Byb2JzX3Rlc3QyIDwtIHByZWRpY3QoZ2xtX2ZpdF90cmFpbjIsIG5ld2RhdGEgPSBTbWFya2V0WyF0cmFpbixdLCB0eXBlID0gInJlc3BvbnNlIikNCmdsbV9wcmVkX3Rlc3QyIDwtIGlmZWxzZShnbG1fcHJvYnNfdGVzdDI+MC41LCAiVXAiLCAiRG93biIpDQpgYGANCg0KQ2hlY2tpbmcgcGVyZm9ybWFuY2Ugb2YgdXBkYXRlZCBjbGFzc2lmaWNhdGlvbiBjb2Rlcw0KYGBge3J9DQp0YWJsZShnbG1fcHJlZF90ZXN0MiwgU21hcmtldF9EaXJlY3Rpb25fMjAwNSkNCm1lYW4oZ2xtX3ByZWRfdGVzdDIgPT0gU21hcmtldF9EaXJlY3Rpb25fMjAwNSkNCg0KYGBgDQoNCg0KDQotLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQoNCjIpIFJ1bm5pbmcgTGluZWFyIERpc2NyaW1pbmFudCBBbmFseXNpcyBvbiB0aGUgU3RvY2sgTWFya2V0IGRhdGFzZXQNCg0KYGBge3J9DQpsaWJyYXJ5KE1BU1MpDQpybShsaXN0ID0gbHMoKSkNCmBgYA0KDQoNCkZpdHRpbmcgYSBMREEgbW9kZWwNCmBgYHtyfQ0KbGRhX2ZpdCA8LSBsZGEoRGlyZWN0aW9uIH4gTGFnMSArIExhZzIsIGRhdGEgPSBTbWFya2V0LCBzdWJzZXQgPSBZZWFyIDwgMjAwNSkNCmxkYV9maXQNCnBsb3QobGRhX2ZpdCkNCmBgYA0KDQoNCkNoZWNraW5nIHdoYXQgdGhlIHBlcmZvcm1hbmNlIGlzIG9uIHRlc3QgZGF0YSAoeWVhciA9IDIwMDUpDQoNCmBgYHtyfQ0KU21hcmtldF8yMDA1IDwtIHN1YnNldChTbWFya2V0LCBZZWFyID09IDIwMDUpDQpsZGFfcHJlZCA9IHByZWRpY3QobGRhX2ZpdCwgU21hcmtldF8yMDA1KQ0KZGF0YS5mcmFtZShsZGFfcHJlZClbMTo1LF0NCg0KdGFibGUobGRhX3ByZWQkY2xhc3MsU21hcmtldF8yMDA1JERpcmVjdGlvbikNCm1lYW4obGRhX3ByZWQkY2xhc3M9PVNtYXJrZXRfMjAwNSREaXJlY3Rpb24pDQoNCmBgYA0KDQoNCi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KDQozKSBLLU5lYXJlc3QgTmVpZ2hib3VycyBjbGFzc2lmaWNhdGlvbg0KDQpgYGB7cn0NCmxpYnJhcnkoY2xhc3MpDQo/a25uDQoNClhsYWcgPC0gY2JpbmQoU21hcmtldCRMYWcxLCBTbWFya2V0JExhZzIpDQp0cmFpbiA8LSBTbWFya2V0JFllYXIgPCAyMDA1DQoNCmtubl9wcmVkIDwtIGtubihYbGFnW3RyYWluLF0sIFhsYWdbIXRyYWluLF0sIFNtYXJrZXQkRGlyZWN0aW9uW3RyYWluXSwgaz0xKQ0KDQp0YWJsZShrbm5fcHJlZCwgU21hcmtldCREaXJlY3Rpb25bIXRyYWluXSkNCm1lYW4oa25uX3ByZWQgPT0gU21hcmtldCREaXJlY3Rpb25bIXRyYWluXSkNCg0KYGBgDQoNCg0KDQojIyBLLU5lYXJlc3QgTmVpZ2hib3JzDQpsaWJyYXJ5KGNsYXNzKQ0KP2tubg0KYXR0YWNoKFNtYXJrZXQpDQpYbGFnPWNiaW5kKExhZzEsTGFnMikNCnRyYWluPVllYXI8MjAwNQ0Ka25uLnByZWQ9a25uKFhsYWdbdHJhaW4sXSxYbGFnWyF0cmFpbixdLERpcmVjdGlvblt0cmFpbl0saz0xKQ0KdGFibGUoa25uLnByZWQsRGlyZWN0aW9uWyF0cmFpbl0pDQptZWFuKGtubi5wcmVkPT1EaXJlY3Rpb25bIXRyYWluXSkNCg==