Predicting Boston House Prices and Ionosphere Data Analysis with Machine Learning
Classified in Computers
Written at on English with a size of 5.24 KB.
Boston Housing Data Analysis
library(mlbench)
install.packages("dplyr")
library(dplyr)
library(ggplot2)
library(reshape2)
data("BostonHousing")
housing <- BostonHousing
str(housing)
housing %>%
ggplot(aes(x = medv)) +
stat_density() +
labs(x = "Median Value ($1000s)", y = "Density", title = "Density Plot of Median Value House Price in Boston") +
theme_minimal()
summary(housing$medv)
housing %>%
select(c(crim, rm, age, rad, tax, lstat, medv)) %>%
melt( id.vars = "medv") %>%
ggplot(aes(x = value, y = medv, colour = variable)) +
geom_point(alpha = 0.7) +
stat_smooth(aes(colour = "black")) +
facet_wrap(~variable, scales = "free", ncol = 2) +
labs(x = "Variable Value", y = "Median House Price ($1000s)") +
theme_minimal()
library("caret")
set.seed(123) #random number generation
to_train <-
createDataPartition(y = housing$medv, p = 0.75, list = FALSE)
to_test<-createDataPartition(y=housing$medv, p=0.25,list=FALSE)
train <-
housing[to_train, ]
test <-
housing[to_test, ]
first_lm <-
lm( medv ~ crim +rm +tax +lstat, data = train)
lm1_rsqu <-
summary(first_lm)$r.squared
print(paste("1st linear model has an r-squared value of ", round(lm1_rsqu, 3), sep = ""))
## [1] "1st linear model has an r-squared value of 0.672"
#plot(first_lm)
second_lm <-
lm(log(medv) ~ crim +rm + tax +lstat, data = train)
lm2_rsqu <-
summary(second_lm)$r.squared
print(paste("Our 2nd linear model has an r-squared value of ", round(lm2_rsqu, 3), sep = ""))
abs(mean(second_lm$residuals))
predicted <-
predict(second_lm, newdata = test)
results <-
data.frame(predicted = exp(predicted), original = test$medv)
results %>%
ggplot(aes(x = predicted, y = original)) +
geom_point() +
stat_smooth() +
labs(x = "Predicted Values", y = "Original Values", title = "Predicted vs. Original Values") +
theme_minimal()
Ionosphere Data Analysis with KNN
install.packages("KernelKnn")
data(ionosphere, package = 'KernelKnn')
apply(ionosphere, 2, function(x) length(unique(x)))
ionosphere = ionosphere[, -2]
X = scale(ionosphere[, -ncol(ionosphere)])
y = ionosphere[, ncol(ionosphere)]
y = c(1:length(unique(y)))[ match(ionosphere$class, sort(unique(ionosphere$class))) ]
spl_train = sample(1:length(y), round(length(y) * 0.75))
spl_test = setdiff(1:length(y), spl_train)
str(spl_train)
str(spl_test)
acc = function (y_true, preds) {
out = table(y_true, max.col(preds, ties.method = "random"))
acc = sum(diag(out))/sum(out)
acc
}
library(KernelKnn)
preds_TEST = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 5 ,
method = 'euclidean', weights_function = NULL, regression = F,
Levels = unique(y))
head(preds_TEST)
preds_TEST_tric = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 ,
method = 'canberra', weights_function = 'tricube', regression = F,
Levels = unique(y))
head(preds_TEST_tric)
norm_kernel = function(W) {
W = dnorm(W, mean = 0, sd = 1.0)
W = W / rowSums(W)
return(W)
}
preds_TEST_norm = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 ,
method = 'canberra', weights_function = norm_kernel, regression = F,
Levels = unique(y))
head(preds_TEST_norm)
weights_function = 'tricube', regression = F,
Levels = unique(y), threads = 5)
str(fit_cv_pair1)
fit_cv_pair2 = KernelKnnCV(X, y, k = 9 , folds = 5,method = 'canberra',
weights_function = 'epanechnikov', regression = F,
Levels = unique(y), threads = 5)
str(fit_cv_pair2)
#Each cross-validated object returns a list of length 2
acc_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds),
function(x) acc(y[fit_cv_pair1$folds[[x]]],
fit_cv_pair1$preds[[x]])))
acc_pair1
cat('accurcay for params_pair1 is :', mean(acc_pair1), '
')
acc_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds),
function(x) acc(y[fit_cv_pair2$folds[[x]]],
fit_cv_pair2$preds[[x]])))
acc_pair2
cat('accuracy for params_pair2 is :', mean(acc_pair2), '
')