str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
colnames(mpg)
## [1] "manufacturer" "model" "displ" "year"
## [5] "cyl" "trans" "drv" "cty"
## [9] "hwy" "fl" "class"
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class)) +
xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(mpg, aes(displ, hwy, color = factor(cyl))) + geom_point() +
xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(data = mpg) +
geom_jitter(mapping = aes(x = displ, y = hwy, size = cyl), alpha = .4 ) +
xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
facet_wrap(~class)+
xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(mpg, aes(displ, hwy, color = factor(cyl))) +
geom_point() +
geom_smooth(method = "lm") +
xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_smooth(method = "loess") +
xlab("Engine volume") + ylab("Highway miles per gallon")
?loess
Description
Fit a polynomial surface determined by one or more numerical predictors,
using local fitting.
Usage
loess(formula, data, weights, subset, na.action, model = FALSE,
span = 0.75, enp.target, degree = 2,
parametric = FALSE, drop.square = FALSE, normalize = TRUE,
family = c("gaussian", "symmetric"),
method = c("loess", "model.frame"),
control = loess.control(...), ...)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_smooth(aes(colour = "loess"), method = "loess", se = FALSE) +
geom_smooth(aes(colour = "lm"), method = "lm", se = FALSE) +
labs(colour = "Method") +
xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(mpg, aes(displ, hwy)) + geom_point() + geom_smooth(method = "loess") +
facet_wrap(~year) + xlab("Engine volume") + ylab("Highway miles per gallon")
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(
mapping = aes(<MAPPINGS>),
position = <POSITION>
) +
<COORDINATE_FUNCTION> +
<FACET_FUNCTION> +
<VISUAL THEMING FUNCTION>
Using the built-in “economics” dataset, we can visualize the unemployment level:
ggplot(economics, aes(date, unemploy)) +
geom_line() +
xlab("Timeline") + ylab("Unemployment level, thousands")
Information about the housing market in Texas provided by the TAMU real estate center,
https://recenter.tamu.edu/.
A data frame with 8602 observations and 9 variables:
city
Name of MLS area
year,month,date
Date
sales
Number of sales
volume
Total value of sales
median
Median sale price
listings
Total active listings
inventory
"Months inventory": amount of time it would take to sell all current listings at current pace of sales.
ggplot(txhousing, aes(date, sales)) +
geom_line(aes(group = city), alpha = 1/2)
Problems: seasonal trend; small vs big cities.
ggplot(txhousing, aes(date, log(sales))) +
geom_line(aes(group = city), alpha = 1/2)
We are using the categorical prediction to remove the monthly trend.
abilene <- txhousing %>% filter(city == "Abilene")
ggplot(abilene, aes(date, log(sales))) +
geom_line()
mod <- lm(log(sales) ~ factor(month), data = abilene)
abilene$rel_sales <- resid(mod)
ggplot(abilene, aes(date, rel_sales)) +
geom_line()
txhousing <- txhousing %>%
group_by(city) %>%
mutate(rel_sales = resid(lm(log(sales) ~ factor(month),
na.action = na.exclude))
)
ggplot(txhousing, aes(date, rel_sales)) +
geom_line(aes(group = city), alpha = 1/5) +
geom_line(stat = "summary", fun.y = "mean", colour = "red")
sdating <- read.csv("Speed Dating Data.csv")
This dataset was compiled by Columbia Business School professors Ray Fisman and Sheena Iyengar for their paper Gender Differences in Mate Selection: Evidence From a Speed Dating Experiment. Data was gathered from about 8400 participants in experimental speed dating events from 2002-2004. During the events, the attendees would have a four minute “first date” with every other participant of the opposite sex. At the end of their four minutes, participants were asked if they would like to see their date again. They were also asked to rate their date on six attributes: Attractiveness, Sincerity, Intelligence, Fun, Ambition, and Shared Interests.
us<-map_data('state')
ggplot(mymap, aes(longitude,latitude)) +
geom_polygon(data=us, aes(x=long, y=lat, group=group), color='black', fill=NA, alpha=.5)+
geom_point(aes(size = age), alpha=.7, color = 'blue') +
facet_wrap(~race) +
coord_quickmap() +
xlim(-100,-60)+ylim(25,50)
us<-map_data('state')
ggplot(filter(mymap, age> 30),aes(longitude,latitude)) +
geom_polygon(data=us, aes(x=long,y=lat,group=group), color='black', fill=NA, alpha=.5)+
geom_point(aes(size = age), alpha=.7, color = 'blue') +
facet_wrap(~race) +
coord_quickmap() +
xlim(-100,-60)+ylim(25,50)