How to show integers when using ggplot2::geom_smooth() - r

In the example below, how can I round the x label to even numbers? I cant convert them as factors first, because then geom_smooth does not work
library(ggplot2)
set.seed(32)
df <- data.frame(a = as.integer(rnorm(250, 2, 0.1)))
df$b <- df$a + rnorm(250)
df$id = 1
df_2 <- df
df_2$id <- 2
df_tot <- rbind(df, df_2)
ggplot(df_tot, aes(x = a, y = b)) +
geom_smooth() +
facet_wrap(~id)

If we want even numbers, an option is to add labels as a function in scale_x_continuous
library(ggplot2)
ggplot(df_tot, aes(x = a, y = b)) +
geom_smooth() +
facet_wrap(~id) +
scale_x_continuous(labels = function(x) seq(2, length.out = length(x)))

Related

Labelling points on a geom_point() based on the equation of geom_abline()

I have a fun problem for you today. Any help would be amazing.
I have geom_plot()
set.seed(1)
list1 <- sample(10, 100, replace = T)
list2 <- sample(7, 100, replace = T)
df <- data.frame(list1, list2)
df
ggplot(data=df, aes(x=list1, y=list2)) + geom_point()
x3 <- c(10,6)
y3 <- c(1,7)
slope3 <- diff(y3)/diff(x3)
intercept3 <- y3[1] - slope*x3[1]
ggplot(data = df, aes(x=list1, y=list2)) +
geom_point() +
geom_abline(data = NULL, intercept = intercept3, slope = slope3, col="red")
What I would ideally like to do is label all the points that lay on, or to the right of the geom_abline() I have plotted I wanted to use the geom_label_repel() function to make this look tidy, but when I tried this it just labelled every poitn! If possible, I would also like to further subset the data points on or to the right of the plotted geom_abline() in the future based on other criteria.
Thank you in advance!
You can try this:
set.seed(1)
list1 <- sample(10, 100, replace = T)
list2 <- sample(7, 100, replace = T)
df <- data.frame(list1, list2)
df
ggplot(data=df, aes(x=list1, y=list2)) + geom_point()
x3 <- c(10,6)
y3 <- c(1,7)
slope3 <- diff(y3)/diff(x3)
intercept3 <- y3[1] - slope3*x3[1]
#Mutate
df$prod <- intercept3+slope3*df$list1
df$label <- ifelse(df$list2>df$prod,'text',NA)
ggplot(data = df, aes(x=list1, y=list2,label=label)) +
geom_point() +
geom_abline(data = NULL, intercept = intercept3, slope = slope3, col="red")+
geom_text(vjust=-0.5)

ggplot2: How to get geom_text() to play nice with facet_grid()?

So I'm trying to plot a couple of curves using ggplot(), and I would like to have each curve sitting in its own plot in a facet_grid. All of this works fine.
The problem is that I'd also like to annotate the curve with the x value corresponding to the peak y value. I tried using geom_text(), and I tried implementing it as shown below, but it doesn't seem to quite work. It's clearly printing something onto the plot, but not the way I hoped it would; i.e., each plot has its corresponding x value printed on it at the location (x, max(y)).
I suspect I've not implemented the ifelse() correctly, but I'm not experienced enough with R to figure out what exactly the problem is.
Any suggestions on where I'm going wrong?
Output:
Data + code:
library('ggplot2')
x <- seq(5, 15, length=1000)
y <- dnorm(x, mean=10, sd=1)
z <- rep_len("z", length.out = 1000)
x1 <- seq(5, 15, length=1000)
y1 <- dnorm(x1, mean=10, sd=2)
z1 <- rep_len("z1", length.out = 1000)
x <- c(x, x1)
y <- c(y, y1)
z <- c(z, z1)
df <- data.frame(x, y, z)
ggplot(data = df, aes(x, y)) + geom_line() + facet_grid(.~z) + geom_text(data = df, aes(x, y, label = ifelse(y == max(y), as.numeric(x), '')), inherit.aes = FALSE, hjust = 0, vjust = 0)
Edit: the output I'm expecting is something like this:
You need to fix two things.
(1) calculate max per z
(2) avoid duplicate y_values
The following code should fix both:
library(dplyr)
df2 <- df %>%
distinct(y, .keep_all = TRUE) %>%
group_by(z) %>%
mutate(y_label = ifelse(y == max(y), as.numeric(x), ''))
as.data.frame(df2)
ggplot(data = df2, aes(x, y)) + geom_line() + facet_grid(.~z) + geom_text(aes(label = y_label), hjust = 0, vjust = 0)
You need to provide geom_text a data.frame with data for z and z1.
x y z
z 9.994995 0.3989373 z
z1 9.994995 0.1994705 z1
How to get that? Well, here's one way.
df.split <- split(df, f = df$z)
df.max <- sapply(df.split, FUN = function(x) which.max(x$y))
df.max <- mapply(function(x1, x2) x1[x2, ], x1 = df.split, x2 = df.max, SIMPLIFY = FALSE)
df.max <- do.call(rbind, df.max)
which you can then plot
ggplot(data = df, aes(x, y)) +
geom_line() +
geom_text(data = df.max, aes(x = x, y = y, label = round(y, 2))) +
facet_grid(. ~ z)
Get the means and maxes for each z:
Ys <- df %>% group_by(z) %>% summarise(maxY = max(y))
Xs <- df %>% group_by(z) %>% summarise(meanX = mean(x))
Plot with the geom_text
ggplot(data = df, aes(x, y)) +
geom_line() +
geom_text(data = left_join(Xs,Ys), aes(meanX, maxY, label = meanX)) +
facet_grid(.~z)
Or more succinctly
ggplot(data = df, aes(x, y)) +
geom_line() +
geom_text(data =
df %>%
group_by(z) %>%
summarise(maxY = max(y), meanX = mean(x)),
aes(meanX, maxY, label = meanX)) +
facet_grid(.~z)

Format lubridate duration column for axis labelling

I want to plot data containing numbers and durations. For the transformation of the character vector I chose the lubridate package. Unfortunately, the duration is always printed in seconds on the x axis:
set.seed(20161027)
a <- c("00:30:45", "00:59:07", "01:08:30", "02:10:09", "02:20:53")
b <- rnorm(n = 5)
example <- data.frame(a, b)
# This is what I want
ggplot(data = example, aes(x = a, y = b)) +
geom_point()
library(lubridate)
a <- hms(a)
a <- as.duration(a)
example <- data.frame(a, b)
ggplot(data = example, aes(x = a, y = b)) +
geom_point()
This is how I want it to look like.
This is how it currently looks.
Is there a lubridate way to format the time to a prettier format? Or do I need to preserve the character vector for axis labels?
Not sure why you'd want to do this with lubridate. By using as.POSIXct:
a <- as.POSIXct(c("00:30:45", "00:59:07", "01:08:30", "02:10:09", "02:20:53"),
format="%H:%M:%S")
b <- rnorm(n = 5)
example <- data.frame(a, b)
ggplot(data = example, aes(x = a, y = b)) +
geom_point()
I think #HubertL's solution is good enough, but if you insist on using lubridate, you can try
library(lubridate)
set.seed(20161027)
a <- c("00:30:45", "00:59:07", "01:08:30", "02:10:09", "02:20:53")
b <- rnorm(5)
example <- data.frame(a=hms(a), b=b)
ggplot(data = example, aes(x = a$hour + a$minute / 60 + a$second / 60^2, y = b)) +
geom_point() +
scale_x_continuous(name="a",
breaks=c(0.5, 1, 1.5, 2),
labels=c("00:30", "01:00", "01:30", "02:00"))

Regression line in ggplot2

I am trying to add a regression line to the below plot using ggplot, but it keeps giving me vague errors. I am a newbie, and none of the other questions regarding this subject solved my problem, so please don't get pissed off about similar questions already answered.
library(UsingR,ggplot2); data(galton)
y <- galton$child
x <- galton$parent
freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
regression <- coef(lm(y~x))
freqData <- freqData[freqData$freq > 0,]
g <- ggplot(data=freqData, aes(x = parent, y = child))
g <- g + scale_size(range = c(2,20), guide = 'none')
g <- g + geom_point(colour="grey50", aes(size=freq+20,show_guide=FALSE))
g <- g + geom_point(aes(colour=freq,size=freq))
g <- g + scale_colour_gradient(low="lightblue",high="darkblue")
I have tried the below commands:
g <- g + geom_smooth(method="lm",se=FALSE)
(it yields this error: geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?)
and
g <- g + geom_abline(intercept = 28.942, slope = 0.646,colour = "red",size = 3)
(but nothing appears on my plot...)
Here is a data.table-solution (write-up prompted by#MikeWise, to showcase the cool plot you designed)
library(UsingR,ggplot2); data(galton)
library(data.table)
#making data.table object
dat <- galton
setDT(dat)
#getting frequencies
freqData <- dat[,.(freq=.N),by=.(child,parent)]
g <- ggplot(data=freqData, aes(x = parent, y = child))
g <- g + scale_size(range = c(2,20), guide = 'none')
g <- g + geom_point(colour="grey50", aes(size=freq+20,show_guide=FALSE))
g <- g + geom_point(aes(colour=freq,size=freq))
g <- g + scale_colour_gradient(low="lightblue",high="darkblue")
g <- g + geom_smooth(method="lm",se=FALSE)
g
First option
Keep using the function table.We use type.convert to convert the variables parent and child to their appropiate types before plotting the chart.
library(UsingR,ggplot2); data(galton)
# Create data frame
freqData <- data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
freqData <- freqData[freqData$freq > 0,]
# Convert factors to numeric
freqData[] <- lapply(freqData, function(x) type.convert(as.character(x)))
Second option
Using the function aggregate, to prevent type conversion.
freqData <- aggregate(galton, by = list(parent = galton$parent, child = galton$child),
FUN = length)
colnames(freqData)[3] <- "freq"
Third option
Using dplyr to avoid type conversion.
library(dplyr)
freqData <- galton %>% group_by(parent, child) %>% summarise(freq = n())
Plotting the data frame created previously by one of the three options.
# Plot data
g <- ggplot(data=freqData, aes(x = parent, y = child))+
scale_size(range = c(2,20), guide = 'none') +
geom_point(colour="grey50", aes(size=freq+20,show_guide=FALSE)) +
geom_point(aes(colour=freq,size=freq)) +
scale_colour_gradient(low="lightblue",high="darkblue") +
geom_smooth(method = lm, se = FALSE)
g

How to create faceted linear regression plot using GGPLOT

I have a data frame created the following way.
library(ggplot2)
x <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="x")
y <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="y")
# in reality the number of row could be larger than 10 for each x and y
all <- rbind(x,y)
colnames(all) <- c("name","val1","val2","type")
What I want to do is to create a faceted ggplot that looks roughly like this:
Hence each facet above is the correlation plot of the following:
# Top left facet
subset(all,type=="x")$val1
subset(all,type=="y")$val1
# Top right facet
subset(all,type=="x")$val1
subset(all,type=="y")$val2
# ...etc..
But I'm stuck with the following code:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(type ~ )
# Calculate correlation for each group
cors <- ddply(all, c(type ~ ), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
What's the right way to do it?
Some of your code was incorrect. This works for me:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(~type)
# Calculate correlation for each group
cors <- ddply(all, .(type), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Edit: Following OP's comment and edit. The idea is to re-create the data with all four combinations and then facet.
# I consider the type in your previous data to be xx and yy
dat <- data.frame(val1 = c(rep(all$val1[all$type == "x"], 2),
rep(all$val1[all$type == "y"], 2)),
val2 = rep(all$val2, 2),
grp1 = rep(c("x", "x", "y", "y"), each=10),
grp2 = rep(c("x", "y", "x", "y"), each=10))
p <- ggplot(dat, aes(val1, val2)) + geom_point() + geom_smooth(method = "lm") +
facet_grid(grp1 ~ grp2)
cors <- ddply(dat, .(grp1, grp2), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Since your data is not in the appropriate format, some reshaping is necessary before it can be plotted.
Firstly, reshape the data to the long format:
library(reshape2)
allM <- melt(all[-1], id.vars = "type")
Split the values along type and val1 vs. val2:
allList <- split(allM$value, interaction(allM$type, allM$variable))
Create a list of all combinations:
allComb <- unlist(lapply(c(1, 3),
function(x)
lapply(c(2 ,4),
function(y)
do.call(cbind, allList[c(x, y)]))),
recursive = FALSE)
Create a new dataset:
allNew <- do.call(rbind,
lapply(allComb, function(x) {
tmp <- as.data.frame(x)
tmp <- (within(tmp, {xval <- names(tmp)[1];
yval <- names(tmp)[2]}))
names(tmp)[1:2] <- c("x", "y")
tmp}))
Plot:
library(ggplot2)
p <- ggplot(allNew, aes(x = x, y = y)) +
geom_smooth(method = "lm") +
geom_point() +
facet_grid(yval ~ xval)
# Calculate correlation for each group
library(plyr)
cors <- ddply(allNew, .(yval, xval), summarise, cor = round(cor(x, y), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
There is an additional package ggpubr available now addressing exactly this issue with the stat_cor() function.
library(tidyverse)
library(ggpubr)
ggplot(all, aes(val1, val2))+
geom_smooth(method = "lm") +
geom_point() +
facet_grid(~type) +
stat_cor()

Resources