Libraries Import
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.1.3
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.1.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
EDA
sum(is.na(resale_2023))
## [1] 0
summary(resale_2023)
## month town flat_type block
## Length:4410 Length:4410 Length:4410 Length:4410
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## street_name storey_range floor_area_sqm flat_model
## Length:4410 Length:4410 Min. : 37.00 Length:4410
## Class :character Class :character 1st Qu.: 73.00 Class :character
## Mode :character Mode :character Median : 93.00 Mode :character
## Mean : 94.82
## 3rd Qu.:111.75
## Max. :192.00
## resale_price remaining_lease
## Min. : 230000 Min. :43.00
## 1st Qu.: 430000 1st Qu.:61.17
## Median : 535000 Median :73.42
## Mean : 555608 Mean :73.79
## 3rd Qu.: 648000 3rd Qu.:90.58
## Max. :1340000 Max. :95.50
str(resale_2023)
## 'data.frame': 4410 obs. of 10 variables:
## $ month : chr "2023-01" "2023-01" "2023-01" "2023-01" ...
## $ town : chr "ANG MO KIO" "ANG MO KIO" "ANG MO KIO" "ANG MO KIO" ...
## $ flat_type : chr "2 ROOM" "2 ROOM" "2 ROOM" "2 ROOM" ...
## $ block : chr "406" "323" "314" "314" ...
## $ street_name : chr "ANG MO KIO AVE 10" "ANG MO KIO AVE 3" "ANG MO KIO AVE 3" "ANG MO KIO AVE 3" ...
## $ storey_range : chr "01 TO 03" "04 TO 06" "04 TO 06" "07 TO 09" ...
## $ floor_area_sqm : num 44 49 44 44 45 67 70 67 73 73 ...
## $ flat_model : chr "Improved" "Improved" "Improved" "Improved" ...
## $ resale_price : num 267000 300000 280000 282000 289800 ...
## $ remaining_lease: num 55.4 53.5 54.1 54.1 62.1 ...
barplot(table(resale_2023$month))
data:image/s3,"s3://crabby-images/a71be/a71be6a4e3b2876aa0a6174cab04f7c372ebdcdd" alt=""
barplot(table(resale_2023$town))
data:image/s3,"s3://crabby-images/baccb/baccb29083937a01a24ee42175b2c7f8559c9f71" alt=""
barplot(table(resale_2023$flat_type))
data:image/s3,"s3://crabby-images/975d6/975d66ad29a33ba8a4ee60943e7900365e8a1814" alt=""
length(unique(resale_2023$block))
## [1] 1650
barplot(table(resale_2023$street_name))
data:image/s3,"s3://crabby-images/098f5/098f5c474e4e13d51f820e0e5a79bbc838a604c5" alt=""
barplot(table(resale_2023$storey_range))
data:image/s3,"s3://crabby-images/ba119/ba119c917223f93530fcf2a3c13fe2656d5d2b94" alt=""
barplot(table(resale_2023$floor_area_sqm))
data:image/s3,"s3://crabby-images/15b73/15b73a0dd715a6fddb91579d5b670cb124309887" alt=""
barplot(table(resale_2023$flat_model))
data:image/s3,"s3://crabby-images/48ee7/48ee7cc1ac42d0630929ce526da25da91fd95e20" alt=""
barplot(table(resale_2023$remaining_lease))
data:image/s3,"s3://crabby-images/ddcc3/ddcc3ce70240ef9caaf612b87847a54380cff556" alt=""
barplot(table(resale_2023$resale_price))
data:image/s3,"s3://crabby-images/2584e/2584edb52775c1ebd59b1ff9198b4b1db4eba850" alt=""
resale_2023 %>%
ggplot() +
geom_boxplot(aes(x = month, y = resale_price))
data:image/s3,"s3://crabby-images/ea44f/ea44fa52c9b7bcd8bf60ecb84e648defba5b724f" alt=""
resale_2023 %>%
ggplot() +
geom_boxplot(aes(x = town, y = resale_price)) +
labs(title = "Resale Price Distribution by Town") +
xlab("Town") +
ylab("Resale Price") +
theme(axis.text.x = element_text(angle = 90))
data:image/s3,"s3://crabby-images/77626/7762696cd35b4a90033301fe79cd95cd5289f4b0" alt=""
resale_2023 %>%
ggplot() +
geom_boxplot(aes(x = flat_type, y = resale_price)) +
labs(title = "Resale Price Distribution by Flat Type") +
xlab("Flat Type") +
ylab("Resale Price") +
theme_minimal()
data:image/s3,"s3://crabby-images/efbe5/efbe5b6b9ba8f7d5228c8f72897e40a066b9395c" alt=""
resale_2023 %>%
ggplot() +
geom_boxplot(aes(x = flat_type, y = resale_price)) +
facet_wrap(~flat_model, scales = "free_x")
data:image/s3,"s3://crabby-images/d6002/d6002243c889ff72598fdb22a51dbb01560e679c" alt=""
resale_2023 %>%
ggplot() +
geom_boxplot(aes(x = flat_model, y = resale_price)) +
facet_wrap(~flat_type, scales = "free") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45))
data:image/s3,"s3://crabby-images/a2a52/a2a528b366dc6fe058021d9eedb8f7eac9a9de85" alt=""
resale_2023 %>%
ggplot() +
geom_boxplot(aes(x = storey_range, y = resale_price)) +
labs(title = "Resale Price Distribution by HDB Storey Range") +
xlab("HDB Storey Range") +
ylab("Resale Price") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30))
data:image/s3,"s3://crabby-images/a3e58/a3e5824325a50edbbd945c1b724cc0954013e7b8" alt=""
resale_2023 %>%
ggplot() +
geom_point(aes(x = floor_area_sqm, y = resale_price)) +
labs(title = "Resale Price Distribution by Flat Floor Area (in m^2)") +
xlab("Flat Floor Area (in m^2)") +
ylab("Resale Price") +
theme_minimal()
data:image/s3,"s3://crabby-images/ae934/ae9346bb65842f8f148d7d6fcb9c835da3bf1a02" alt=""
resale_2023 %>%
ggplot() +
geom_point(aes(x = floor_area_sqm, y = resale_price, color = storey_range)) +
labs(title = "Resale Price Distribution by Flat Floor Area (in m^2)") +
xlab("Flat Floor Area (in m^2)") +
ylab("Resale Price") +
theme_minimal()
data:image/s3,"s3://crabby-images/4e969/4e969ba3095780c75f3cbd815f78743b9a5b8b87" alt=""
resale_2023 %>%
ggplot() +
geom_boxplot(aes(x = flat_model, y = resale_price))
data:image/s3,"s3://crabby-images/11acc/11acc1312977e8e933e1f59c8e98d218fc73f6d2" alt=""
resale_2023 %>%
ggplot() +
geom_point(aes(x = remaining_lease, y = resale_price))
data:image/s3,"s3://crabby-images/1cea4/1cea4b571f6a46a63deea8ec3c805dbb5f09762f" alt=""
resale_2023 %>%
ggplot() +
geom_histogram(aes(x = resale_price)) +
labs(title = "Resale Price Distribution") +
ylab("Count") +
xlab("Resale Price") +
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data:image/s3,"s3://crabby-images/ad80b/ad80bea40f8a6b3bc04e6617123528d5d918da46" alt=""
resale_2023 %>%
ggplot(aes(sample = resale_price)) +
stat_qq() +
stat_qq_line() +
labs(title = "Resale Price QQ Plot") +
ylab("Sample Quantiles") +
xlab("Theoretical Quantiles") +
theme_minimal()
data:image/s3,"s3://crabby-images/87a57/87a577b7cdd1f0fc3a2704583eb024bd2dd84f1c" alt=""
resale_2023 %>%
ggplot() +
geom_histogram(aes(x = remaining_lease))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data:image/s3,"s3://crabby-images/ee225/ee2253c7ef22fa1477fd89a62b16dfa11c1a30c1" alt=""
resale_2023 %>%
ggplot() +
geom_histogram(aes(x = floor_area_sqm))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data:image/s3,"s3://crabby-images/a5756/a5756d6e51a4bd19a583df27828e7a3012f32964" alt=""
cormat <- round(cor(resale_2023[, c("floor_area_sqm", "remaining_lease", "resale_price")]),2)
head(cormat)
## floor_area_sqm remaining_lease resale_price
## floor_area_sqm 1.00 0.07 0.69
## remaining_lease 0.07 1.00 0.37
## resale_price 0.69 0.37 1.00
melted_cormat <- melt(cormat)
head(melted_cormat)
## Var1 Var2 value
## 1 floor_area_sqm floor_area_sqm 1.00
## 2 remaining_lease floor_area_sqm 0.07
## 3 resale_price floor_area_sqm 0.69
## 4 floor_area_sqm remaining_lease 0.07
## 5 remaining_lease remaining_lease 1.00
## 6 resale_price remaining_lease 0.37
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4)
data:image/s3,"s3://crabby-images/9bf56/9bf56a6ca0944e8db065c560edef5d25376fba61" alt=""