Predicting Boston House Prices and Ionosphere Data Analysis with Machine Learning

Classified in Computers

Written on July 24, 2024 in English with a size of 5.24 KB

Boston Housing Data Analysis

library(mlbench) install.packages("dplyr") library(dplyr) library(ggplot2) library(reshape2) data("BostonHousing") housing <- BostonHousing str(housing)

housing %>% ggplot(aes(x = medv)) + stat_density() + labs(x = "Median Value ($1000s)", y = "Density", title = "Density Plot of Median Value House Price in Boston") + theme_minimal()

summary(housing$medv)

housing %>% select(c(crim, rm, age, rad, tax, lstat, medv)) %>% melt( id.vars = "medv") %>% ggplot(aes(x = value, y = medv, colour = variable)) + geom_point(alpha = 0.7) + stat_smooth(aes(colour = "black")) + facet_wrap(~variable, scales = "free", ncol = 2) + labs(x = "Variable Value", y = "Median House Price ($1000s)") + theme_minimal()

library("caret") set.seed(123) #random number generation to_train <- createDataPartition(y = housing$medv, p = 0.75, list = FALSE) to_test<-createDataPartition(y=housing$medv, p=0.25,list=FALSE) train <- housing[to_train, ] test <- housing[to_test, ]

first_lm <- lm( medv ~ crim +rm +tax +lstat, data = train)

lm1_rsqu <- summary(first_lm)$r.squared print(paste("1st linear model has an r-squared value of ", round(lm1_rsqu, 3), sep = "")) ## [1] "1st linear model has an r-squared value of 0.672" #plot(first_lm)

second_lm <- lm(log(medv) ~ crim +rm + tax +lstat, data = train)

lm2_rsqu <- summary(second_lm)$r.squared print(paste("Our 2nd linear model has an r-squared value of ", round(lm2_rsqu, 3), sep = ""))

abs(mean(second_lm$residuals))

predicted <- predict(second_lm, newdata = test) results <- data.frame(predicted = exp(predicted), original = test$medv)

results %>% ggplot(aes(x = predicted, y = original)) + geom_point() + stat_smooth() + labs(x = "Predicted Values", y = "Original Values", title = "Predicted vs. Original Values") + theme_minimal()

Ionosphere Data Analysis with KNN

install.packages("KernelKnn") data(ionosphere, package = 'KernelKnn') apply(ionosphere, 2, function(x) length(unique(x)))

ionosphere = ionosphere[, -2]

X = scale(ionosphere[, -ncol(ionosphere)]) y = ionosphere[, ncol(ionosphere)]

y = c(1:length(unique(y)))[ match(ionosphere$class, sort(unique(ionosphere$class))) ]

spl_train = sample(1:length(y), round(length(y) * 0.75))

spl_test = setdiff(1:length(y), spl_train) str(spl_train) str(spl_test)

acc = function (y_true, preds) { out = table(y_true, max.col(preds, ties.method = "random")) acc = sum(diag(out))/sum(out) acc }

library(KernelKnn) preds_TEST = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 5 , method = 'euclidean', weights_function = NULL, regression = F, Levels = unique(y)) head(preds_TEST)

preds_TEST_tric = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 , method = 'canberra', weights_function = 'tricube', regression = F, Levels = unique(y)) head(preds_TEST_tric)

norm_kernel = function(W) { W = dnorm(W, mean = 0, sd = 1.0) W = W / rowSums(W) return(W) } preds_TEST_norm = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 , method = 'canberra', weights_function = norm_kernel, regression = F, Levels = unique(y)) head(preds_TEST_norm)

weights_function = 'tricube', regression = F,

Levels = unique(y), threads = 5) str(fit_cv_pair1) fit_cv_pair2 = KernelKnnCV(X, y, k = 9 , folds = 5,method = 'canberra', weights_function = 'epanechnikov', regression = F, Levels = unique(y), threads = 5) str(fit_cv_pair2)

#Each cross-validated object returns a list of length 2 acc_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds), function(x) acc(y[fit_cv_pair1$folds[[x]]], fit_cv_pair1$preds[[x]]))) acc_pair1 cat('accurcay for params_pair1 is :', mean(acc_pair1), ' ') acc_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds), function(x) acc(y[fit_cv_pair2$folds[[x]]], fit_cv_pair2$preds[[x]]))) acc_pair2 cat('accuracy for params_pair2 is :', mean(acc_pair2), ' ')

Related entries:

Tags: