R - ggplot2 contour plot - r

I am trying to replicate the code from Andrew Ng's Machine Learning course on Coursera in R (as the course is in Octave).
Basically I have to plot a non linear decision boundary (at p = 0.5) for a polynomial regularized logistic regression.
I can easily replicate the plot with the base library:
contour(u, v, z, levels = 0)
points(x = data$Test1, y = data$Test2)
where:
u <- v <- seq(-1, 1.5, length.out = 100)
and z is a matrix 100x100 with the values of z for every point of the grid.
Dimension of data is 118x3.
I cannot do it in ggplot2. Does somebody know how to replicate the same in ggplot2? I tried with:
z = as.vector(t(z))
ggplot(data, aes(x = Test1, y = Test2) + geom_contour(aes(x = u, y =
v, z = z))
But I get the error: Aesthetics must be either length 1 or the same as the data (118): colour, x, y, shape
Thanks.
EDIT (Adding plot created from code of missuse):

What you need is to convert the coordinates into long format. Here is an example using volcano data set:
data(volcano)
in base R:
contour(volcano)
with ggplot2:
library(tidyverse)
as.data.frame(volcano) %>% #convert the matrix to data frame
rownames_to_column() %>% #get row coordinates
gather(key, value, -rowname) %>% #convert to long format
mutate(key = as.numeric(gsub("V", "", key)), #convert the column names to numbers
rowname = as.numeric(rowname)) %>%
ggplot() +
geom_contour(aes(x = rowname, y = key, z = value))
if you would like to label it directly as in base R plot you can use library directlabels:
First map the color/fill to a variable:
as.data.frame(volcano) %>%
rownames_to_column() %>%
gather(key, value, -rowname) %>%
mutate(key = as.numeric(gsub("V", "", key)),
rowname = as.numeric(rowname)) %>%
ggplot() +
geom_contour(aes(x = rowname,
y = key,
z = value,
colour = ..level..)) -> some_plot
and then
library(directlabels)
direct.label(some_plot, list("far.from.others.borders", "calc.boxes", "enlarge.box",
box.color = NA, fill = "transparent", "draw.rects"))
to add markers at specific coordinates you just need to add another layer with appropriate data:
the previous plot
as.data.frame(volcano) %>%
rownames_to_column() %>%
gather(key, value, -rowname) %>%
mutate(key = as.numeric(gsub("V", "", key)),
rowname = as.numeric(rowname)) %>%
ggplot() +
geom_contour(aes(x = rowname, y = key, z = value)) -> plot_cont
add layer with points for instance:
plot_cont +
geom_point(data = data.frame(x = c(35, 47, 61),
y = c(22, 37, 15)),
aes(x = x, y = y), color = "red")
you can add any type of layer this way: geom_line, geom_text to name a few.
EDIT2: to change the scale of the axis there are several options, one is to assign appropriate rownames and colnames to the matrix:
I will assign a sequence from 0 - 2 for the x axis and 0 - 5 to the y axis:
rownames(volcano) <- seq(from = 0,
to = 2,
length.out = nrow(volcano)) #or some vector like u
colnames(volcano) <- seq(from = 0,
to = 5,
length.out = ncol(volcano)) #or soem vector like v
as.data.frame(volcano) %>%
rownames_to_column() %>%
gather(key, value, -rowname) %>%
mutate(key = as.numeric(key),
rowname = as.numeric(rowname)) %>%
ggplot() +
geom_contour(aes(x = rowname, y = key, z = value))

ggplot2 works most efficiently with data in long format. Here's an example with fake data:
library(tidyverse)
u <- v <- seq(-1, 1.5, length.out = 100)
# Generate fake data
z = outer(u, v, function(a, b) sin(2*a^3)*cos(5*b^2))
rownames(z) = u
colnames(z) = v
# Convert data to long format and plot
as.data.frame(z) %>%
rownames_to_column(var="row") %>%
gather(col, value, -row) %>%
mutate(row=as.numeric(row),
col=as.numeric(col)) %>%
ggplot(aes(col, row, z=value)) +
geom_contour(bins=20) +
theme_classic()

Related

Adding a power curve to scatterplot

I want to add a power curve with confidence intervals to my diamter-weight relationship, which clearly follows a y=a*x^b regression. So far, I used the geom_smooth "loess" version, but this is not yet quite right and perfect. Any suggestion how to add a power regression line would be much appreciated. Below is the used code:
p2<-ggplot(Data,aes(x=Diameter,y=Wet_weight,colour=Site))+
geom_point(size=3.5,alpha=0.3)+
geom_smooth(aes(group=Species),method=loess,colour="black")+
labs(x="\nUmbrella diamter (mm)",y="Wet weight (mg)\n")+theme_classic()+
scale_colour_manual(values=c("black","dark blue","blue","dark green","green"))+
theme(axis.title.x=element_text(size=20),
axis.text.x=element_text(size=18,colour="black"),
axis.title.y=element_text(size=20),
axis.text.y=element_text(size=18,colour="black"),
axis.ticks=element_line(colour="black",size=1),
axis.line=element_line(colour="black",size=1,linetype="solid"),
legend.position=c(0.18,0.75),
legend.text=element_text(colour="black",size=17),
legend.title=element_text(colour="black",size=18))
p2
Thank you!
I used this to get many equations, R2, and plots.
df= #change your data frame so it fits the current code
variables=c("group","year") #if you have multiple groups/seasons/years/elements add them here
df$y= #which variable will be your y
df$x= #which variable will be your x
#No changes get the equations
text=df %>%
group_by(across(all_of(variables))) %>% #your grouping variables
do(broom::tidy(lm(log(y) ~ log(x), data = .))) %>%
ungroup() %>%
mutate(y = round(ifelse(term=='(Intercept)',exp(estimate),estimate),digits = 2)) %>% #your equation values rounded to 2
select(-estimate,-std.error,-statistic ,-p.value) %>%
pivot_wider(names_from = term,values_from = y) %>%
rename(.,a=`(Intercept)`,b=`log(x)`)
#CHANGE before running!! add your grouping variables
rsq=df %>%
split(list(.$group,.$year)) %>% #---- HERE add the names after $
map(~lm(log(y) ~ log(x), data = .)) %>%
map(summary) %>%
map_dbl("r.squared") %>%
data.frame()
#Join the R2 and y results for the plot in a single data frame and write the equations
labels.df=mutate(rsq,groups=row.names(rsq)) %>%
separate(col = groups,into = c(variables),sep = "[.]",
convert = TRUE, remove = T, fill = "right") %>%
rename("R"='.') %>%
left_join(text,.) %>%
mutate(R=round(R,digits = 4), #round your R2 digits
eq= paste('y==',a,"~x^(",b,")", sep = ""),
rsql=paste("R^2==",R),
full= paste('y==',a,"~x^(",b,")","~~R^2==",R, sep = ""))
# plot
ggplot(df,aes(x = x,y = y)) +
geom_point(size=4,mapping = aes(
colour=factor(ifelse(is.na(get(variables[2])),"",(get(variables[2])))), #points colour
shape=get(variables[1]))) + # different shapes
facet_wrap(get(variables[1])~ifelse(is.na(get(variables[2])),"",get(variables[2])),
scales = "free",labeller = labeller(.multi_line = F))+ #for multiple groups; join text in one line
stat_smooth(mapping=aes(colour=get(variables[1])), #colours for our trend
method = 'nls', formula = 'y~a*x^b',
method.args = list(start=c(a=1,b=1)),se=FALSE) +
geom_text(labels.df,x = Inf, y = Inf,size=5, mapping = aes(label = (eq)), parse = T,vjust=1, hjust=1)+
geom_text(labels.df,x = Inf,y = Inf,size=5, mapping = aes(label = (rsql)), parse = T,vjust=2.5, hjust=1)+
#scale_y_log10() + #add this to avoid problems with big y values
labs(x="Your x label",y="your y label")+
theme_bw(base_size = 16) +
theme(legend.position = "none",
strip.background = element_rect(fill="#b2d6e2"))

How to add a gradient fill to a geom_density chart

I have a dataset where I'd like to plot the density of one column and add a gradient fill that is associated with another column.
For example, this code creates the following plot
library(datasets)
library(tidyverse)
df <- airquality
df %>%
group_by(Temp) %>%
mutate(count = n(),
avgWind = mean(Wind)) %>%
ggplot(aes(x = Temp, fill = avgWind)) +
geom_density()
What I'd like is for the plot to have a gradient fill that indicates what the average wind (avgWind) was at each temperature along the x-axis.
I've seen some examples that allow me to create a gradient fill that is associated with the values on the x-axis itself (in this case, Temp) or by percentile/quantiles, but I'd like the gradient fill to be associated with an additional variable.
It's sort of like this, but instead of a bar plot, I'd like to keep it as a smoothed density chart:
df %>%
group_by(Temp) %>%
mutate(count = n(),
avgWind = mean(Wind)) %>%
ggplot(aes(x = (Temp), fill = avgWind, group = Temp)) +
geom_bar(aes(y = (..count..)/sum(..count..)))
You can't do gradient fills in geom_polygon so the usual solution is to draw lots of line segments. For example you could do something like this:
library("datasets")
library("tidyverse")
library("viridis")
df <- airquality
df <- df %>%
group_by(Temp) %>%
mutate(count = n(), avgWind = mean(Wind))
## Since we (presumably) want continuous fill, we need to interpolate to
## get avgWind at each Temp value.
## The edges are grey because KDE is estimating density
## Where we don't know the relationship between temp and avgWind
d2fun <- approxfun(df$Temp, df$avgWind)
#> Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
#> 'x' values
dens <- density(df$Temp)
dens_df <- data.frame(x = dens$x, y = dens$y, fill = d2fun(dens[["x"]]))
ggplot(dens_df) +
geom_segment(aes(x = x, xend = x, y = 0, yend = y, color = fill)) +
scale_color_viridis()

How to plot a(n unknown) number of data series as geom_line in same chart

My first Q here, so please go lightly if I'm out of step anywhere.
I'm trying to code R to produce a single chart to contain a number of data series lines. The number of data series may vary but will be provided in the data frame. I have tried to rearrange another thread's content to print the geom_line , but not successfully.
The logic is:
#desire to replace loop of 1:5 with ncol(df)
print(ggplot(df,aes(x=time))
for (i in 1:5) {
print (+ geom_line(aes(y=df[,i]))
}
#functioning geom point loops ggplot production:
for (i in 1:5) {
print(ggplot(df,aes(x=time,y=df[,i]))+geom_point())
}
#functioning multi-line ggplot where n is explicit:
ggplot(data=df, aes(x=time), group=1) +
geom_line(aes(y=df$`3`))+
geom_line(aes(y=df$`4`))
The functioning example code produces n number of point charts, 5 in this case. I would like just one chart to contain n line series.
This may be similar to How to plot n dimensional matrix? for which there are currently no relevant answers
Any contributions much appreciated, thanks
You can use gather from tidyverse "world" to do that.
As you didn't supply a sample data I used mtcars.
I created two data.frames one with 3 columns one with 9. In each one of them I plotted all of the variables against the variable mpg.
library(tidyverse)
df3Columns <- mtcars[, 1:4]
df9Columns <- mtcars[, 1:10]
df3Columns %>%
gather(var, value, -mpg) %>%
ggplot(aes(mpg, value, group = var, color = var)) +
geom_line()
df9Columns %>%
gather(var, value, -mpg) %>%
ggplot(aes(mpg, value, group = var, color = var)) +
geom_line()
Edit - using the sample data in comments.
library(tidyverse)
df %>%
rownames_to_column("time") %>%
gather(var, value, -time) %>%
ggplot(aes(time, value, group = var, color = var)) +
geom_line()
Sample data:
df <- structure(list("39083" = c(96, 100, 100), "39090" = c(99, 100, 100), "39097" = c(99, 100, 100)), row.names = 3:5, class = "data.frame")
To strictly answer your question, you can simply store your ggplot in a variable and add the geom_line one by one:
df <- structure(list("39083" = c(96, 100, 100), "39090" = c(99, 100, 100), "39097" = c(99, 100, 100)), row.names = 3:5, class = "data.frame")
g <- ggplot(df, aes(x = 1:nrow(df)))
for (i in colnames(df))
{
g <- g + geom_line(y = df[,i])
}
g <- g + scale_y_continuous(limits = c(min(df), max(df)))
print(g)
However, this is not a very convenient solution. I would highly recommend to refactor your data frame to be more ggplot style.
df.ultimate <- data.frame(time = numeric(), value = numeric(), group = character())
for (i in colnames(df))
{
df.ultimate <- rbind(df.ultimate, data.frame(time = 1:nrow(df), value = df[, i], group = i))
}
g <- ggplot(df.ultimate, aes(x = time, y = value, color = group))
g <- g + geom_line()
print(g)
A one-line solution:
ggplot(data.frame(time = rep(1:nrow(df), ncol(df)),
value = as.vector(as.matrix(df)),
group = rep(colnames(df), each = nrow(df))),
aes(x = time, y = value, color = group)) + geom_line()

fill area between two lines with different x-values

I have a data frame with two columns x's and y's. Each row represents a line and in each cell is a list with 51 consecutive observations (so 2 lists in each row for x and y).
I want to fill the space between the lines in the data frame.
The problem is that there's a randomness in x and y, so I can't just take the ymin and ymax for each data point on x.
This code would create sample data (with only 2 lines of 10 observations each) that is similar to my actual dataset:
library(data.table)
genData <- function() {
set.seed(42)
genOneLine <- function(m_x, m_y) {
xs = seq(0,1,by=0.1)
x_ran <- rnorm(8, m_x, 0.1)
xs[2:9] = xs[2:9] + x_ran
ys = seq(0,1,by=0.1)
y_ran <- rnorm(8, m_y, 0.1)
ys[2:9] = ys[2:9] + y_ran
return (data.table(x = list(xs), y = list(ys)))
}
return (rbind(genOneLine(-0.1, -0.1), genOneLine(0.1, 0.1)))
}
See if this is what you have in mind?
library(dplyr)
library(ggplot2)
library(data.table)
df <- genData()
df %>%
# add identifier variable to each line
mutate(id = seq(1, n())) %>%
# make each element of the list its own row
tidyr::unnest() %>%
# add sequence identifier, from start of line 1 to its end, then
# from the end of line 2 to its start
group_by(id) %>%
mutate(order = ifelse(id == 1, seq(1, n()), n() + seq(n(), 1))) %>%
ungroup() %>%
# sort data frame
arrange(order) %>%
# plot
ggplot(aes(x = x, y = y)) +
geom_polygon(aes(group = 1), fill = "grey20", alpha = 0.5) +
geom_path(aes(color = factor(id)), size = 2)

Normalizing series for plotting in ggplot2

I have a dataframe that I would like to plot, generated by the following code.
df_rn1 = as.data.frame(cbind(rnorm(40, 1, 1), rep("rn1", 40)))
df_rn2 = as.data.frame(cbind(rnorm(40, 10, 1), rep("rn2", 40)))
df_rn3 = as.data.frame(cbind(rnorm(40, 100, 1), rep("rn3", 40)))
df_test = rbind(df_rn1, df_rn2, df_rn3)
colnames(df_test) <- c("value", "type")
I would like to plot the dataframe normalized by the respective first observation s.t. they are scaled properly. However, I am not getting further than this:
ggplot(aes(x = rep(1:40, 3), y=as.numeric(as.character(value)), color = type), data = df_test) +
geom_line()
Is it possible to do the normalization by types directly in the ggplot code?
Thx
How about this?
library(tidyverse);
df_test %>%
group_by(type) %>%
mutate(
value = as.numeric(as.character(value)),
value.scaled = (value - mean(value)) / sd(value),
idx = 1:n()) %>%
ggplot(aes(idx, value.scaled, colour = type)) + geom_line()
Note that values are scaled within type; not sure what you're after, for global scaling, see #ManishSaraswat's answer.
You can use scale function to normalize the values.
df_test %>%
mutate(value = scale(value)) %>%
ggplot(aes(x = rep(1:40, 3), y = value, color=type))+
geom_line()

Resources