```
data <- read.csv ("train.csv")
test <- read.csv ("test.csv")
dim(data)
head(data[1:10])
unique(unlist(data[1]))
min(data[2:785]) #0 for balck and 255 for white
max(data[2:785])
#lets take a look at t2 samples first n fouth
sample_4 <- matrix(as.numeric(data[4,-1]), nrow = 28, byrow = TRUE)
rotate <- function(x) t(apply(x, 2, rev))
image(rotate(sample_4), col = grey.colors(255))
sample_7 <- matrix(as.numeric(data[7,-1]), nrow = 28, byrow = TRUE)
rotate <- function(x) t(apply(x, 2, rev))
image(rotate(sample_7), col = grey.colors(255))
# Transform target variable "label" from integer to factor, in order to perform classification
is.factor(data$label)
data$label <- as.factor(data$label)
is.factor(data$label)
summary(data$label)
proportion <- prop.table(table(data$label)) * 100
cbind(count=table(data$label), proportion=proportion)
central_block <- c("pixel376", "pixel377", "pixel404", "pixel405")
par(mfrow=c(2, 2))
for(i in 1:9) {
hist(c(as.matrix(data[data$label==i, central_block])),
main=sprintf("Histogram for digit %d", i),
xlab="Pixel value")
}
if (!require("caret"))
install.packages("caret")
library (caret)
#split the data into txo partitions 75% for training and 25% for testing
set.seed(42)
train_perc = 0.75
train_index <- createDataPartition(data$label, p=train_perc, list=FALSE)
data_train <- data[train_index,]
data_test <- data[-train_index,]
dim(data_train)
library(nnet)
# Multinomial logistic regression
model_lr <- multinom(label ~ ., data=data_train, MaxNWts=10000, decay=5e-3, maxit=100)
#make predictions
prediction_lr <- predict(model_lr, test, type = "class")
```