Data Manipulation and Visualization with R and the nycflights13 Dataset
Classified in Computers
Written at on English with a size of 3.64 KB.
library(nycflights13)
library(tidyverse)
Data Manipulation with dplyr
Ordering Rows with arrange()
arrange(flights, year, month, day)
arrange(flights, desc(arr_delay))
Handling NAs
df <- tibble(x = c(5, 2, NA))
arrange(df, x)
arrange(df, desc(x))
Selecting Columns with select()
select(flights, year, month, day)
select(flights, year:day)
select(flights, -(year:day))
rename(flights, mes = month)
select(flights, time_hour, air_time, everything())
Creating New Variables with mutate()
flights_sml <- select(flights, year:day, ends_with("delay"), distance, air_time)
mutate(flights_sml, gain = arr_delay - dep_delay, speed = distance / air_time * 60)
Creating Functions with Vector Arguments
transmute(flights, dep_time, hour = dep_time %/% 100, minute = dep_time %% 100)
(x <- 1:10)
cumsum(x)
cummean(x)
y <- c(1, 2, 2, NA, 3, 4)
rank(y)
Summarizing Data with summarize()
summarize(flights, delay = mean(dep_delay, na.rm = TRUE))
by_day <- group_by(flights, year, month, day)
class(by_day)
summarize(by_day, delay = mean(dep_delay, na.rm = TRUE))
Using the Pipe Operator %>%
x <- 1:10
x %>% mean() %>% exp()
delays <- flights %>%
group_by(dest) %>%
summarize(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter(count > 20, dest != "HNL")
ggplot(data = delays, mapping = aes(x = dist, y = delay)) +
geom_point(aes(size = count), alpha = 1/3) +
geom_smooth(se = FALSE)
xx <- c(1:4, 5, 5, 5, 8, 9, 10)
xx
xx != lag(xx)
table(flights$dest)
Counts
not_cancelled <- flights %>% filter(!is.na(dep_delay), !is.na(arr_delay))
delays <- not_cancelled %>%
group_by(tailnum) %>%
summarize(delay = mean(arr_delay))
ggplot(data = delays, mapping = aes(x = delay)) +
geom_freqpoly(binwidth = 10)
delays <- not_cancelled %>%
group_by(tailnum) %>%
summarize(delay = mean(arr_delay, na.rm = TRUE),
n = n())
ggplot(data = delays, mapping = aes(x = n, y = delay)) +
geom_point(alpha = 1/10)