Colour lines by mean of value pairs in ggplot2 - r

When mapping colour to lines in ggplot2, e.g.:
x = data.frame(x = 1:6, y = c(0,0,10,10,0,0))
ggplot(x, aes(x, y, col=y)) + geom_line(size=5)
.. the lines colours are mapped to the first data point of each line segment. Is there any easy way to get ggplot to calculate the mean value of both points instead (ie. so the sloping lines are both scaled to the colour for 5)?

Similar idea as #Richard, but use the zoo package.
library(zoo)
x = data.frame(x = 1:6, y = c(0,0,10,10,0,0))
ggplot(x, aes(x, y, col=rollmean(y, 2, fill = 0))) + geom_line(size=5)

Does this do what you want?
x = data.frame(x = 1:6, y = c(0,0,10,10,0,0))
x$c <- rowMeans(cbind(x$y, c(x$y[-1], NA)))
ggplot(x, aes(x, y, col=c)) + geom_line(size=5)

Related

Which is the equivalen to seaborn hue in ggplot?

I'm starting to program in R and I'm getting stuck in this plot.
This is the plot I'm traying to make:
I'm able to do it with this code:
x <- seq(0, 10,1 )
y = x**2
z= x**3
plot(x, y, type="o", col="blue",xlab='x',ylab="y = x2")
lines(x,z,col="green")
I need to do it ussing ggplot, since I have to add futher formating, but I'm not finding the way to do it, I'm loking for the equivalen of the "hue" function on seaborn.
sns.catplot(x="sex", y="survived", hue="class", kind="point", data=titanic);
To use ggplot2, it is better to prepare a data frame with all the values. Furthermore, it is recommended to work with "long-format" data frame. We can then map the color to class, which is y and z in your example.
x <- seq(0, 10,1 )
y <- x**2
z <- x**3
# Load the tidyverse package, which contains ggplot2 and tidyr
library(tidyverse)
# Create example data frame
dat <- data.frame(x, y, z)
# Conver to long format
dat2 <- dat %>% gather(class, value, -x)
# Plot the data
ggplot(dat2,
# Map x to x, y to value, and color to class
aes(x = x, y = value, color = class)) +
# Add point and line
geom_point() +
geom_line() +
# Map the color as y is blue and z is green
scale_color_manual(values = c("y" = "blue", "z" = "green")) +
# Adjust the format to mimic the base R plot
theme_classic() +
theme(panel.grid = element_blank())
One way would be creating two dataframes separately
library(ggplot2)
df1 <- data.frame(x, y)
df2 <- data.frame(x, z)
ggplot(df1, aes(x, y)) +
geom_line(color = "blue") +
geom_point(color = "blue", shape = 1, size = 2) +
geom_line(data = df2, aes(x, z), color = "green") +
ylab("y = x2")

Adjusting axis in ggtern ternary plots

I am trying to generate a ternary plot using ggtern.
My data ranges from 0 - 1000 for x, y,and z variables. I wondered if it is possible to extend the axis length above 100 to represent my data.
#Nevrome is on the right path, your points will still be plotted as 'compositions', ie, concentrations sum to unity, but you can change the labels of the axes, to indicate a range from 0 to 1000.
library(ggtern)
set.seed(1)
df = data.frame(x = runif(10)*1000,
y = runif(10)*1000,
z = runif(10)*1000)
breaks = seq(0,1,by=0.2)
ggtern(data = df, aes(x, y, z)) +
geom_point() +
limit_tern(breaks=breaks,labels=1000*breaks)
I think there is no direct solution to do this with ggtern. But an easy workaround could look like this:
library(ggtern)
df = data.frame(x = runif(50)*1000,
y = runif(50)*1000,
z = runif(50)*1000,
Group = as.factor(round(runif(50,1,2))))
ggtern() +
geom_point(data = df, aes(x/10, y/10, z/10, color = Group)) +
labs(x="X", y="Y", z="Z", title="Title") +
scale_T_continuous(breaks = seq(0,1,0.2), labels = 1000*seq(0,1,0.2)) +
scale_L_continuous(breaks = seq(0,1,0.2), labels = 1000*seq(0,1,0.2)) +
scale_R_continuous(breaks = seq(0,1,0.2), labels = 1000*seq(0,1,0.2))

ggplot: Plotting the bins on x-axis and the average on y-axis

Suppose that I have a dataframe that looks like this:
data <- data.frame(y = rnorm(10,0,1), x = runif(10,0,1))
What I would like to do is to cut the x values into bins, such as:
data$bins <- cut(data$x,breaks = 4)
Then, I would like to plot (using ggplot) the result in a way that the x-axis is the bins, and the y axis is the mean of data$y data points that fall into the corresponding bin.
Thank you in advance
You can use the stat_summary() function.
library(ggplot2)
data <- data.frame(y = rnorm(10,0,1), x = runif(10,0,1))
data$bins <- cut(data$x,breaks = 4)
# Points:
ggplot(data, aes(x = bins, y = y)) +
stat_summary(fun.y = "mean", geom = "point")
# Histogram bars:
ggplot(data, aes(x = bins, y = y)) +
stat_summary(fun.y = "mean", geom = "histogram")
Here is the picture of the points:
This thread is a bit old but here you go, use stat_summary_bin (it might be in the newer versions).
ggplot(data, mapping=aes(x, y)) +
stat_summary_bin(fun.y = "mean", geom="bar", bins=4 - 1) +
ylab("mean")
Since the mean of your y values can be smaller than 0, I recommend a dot plot instead of a bar chart. The dots represent the means. You can use either qplot or the regular ggplot function. The latter is more customizable. In this example, both produce the same output.
library(ggplot2)
set.seed(7)
data <- data.frame(y = rnorm(10,0,1), x = runif(10,0,1))
data$bins <- cut(data$x,breaks = 4, dig.lab = 2)
qplot(bins, y, data = data, stat="summary", fun.y = "mean")
ggplot(data, aes(x = factor(bins), y = y)) +
stat_summary(fun.y = mean, geom = "point")
You can also add error bars. In this case, they show the mean +/- 1.96 times the group standard deviation. The group mean and SD can be obtained using tapply.
m <- tapply(data$y, data$bins, mean)
sd <- tapply(data$y, data$bins, sd)
df <- data.frame(mean.y = m, sd = sd, bin = names(m))
ggplot(df, aes(x = bin, y = mean.y,
ymin = mean.y - 1.96*sd,
ymax = mean.y + 1.96*sd)) +
geom_errorbar() + geom_point(size = 3)

Conditional colouring of a geom_smooth

I'm analyzing a series that varies around zero. And to see where there are parts of the series with a tendency to be mostly positive or mostly negative I'm plotting a geom_smooth. I was wondering if it is possible to have the color of the smooth line be dependent on whether or not it is above or below 0. Below is some code that produces a graph much like what I am trying to create.
set.seed(5)
r <- runif(22, max = 5, min = -5)
t <- rep(-5:5,2)
df <- data.frame(r+t,1:22)
colnames(df) <- c("x1","x2")
ggplot(df, aes(x = x2, y = x1)) + geom_hline() + geom_line() + geom_smooth()
I considered calculating the smoothed values, adding them to the df and then using a scale_color_gradient, but I was wondering if there is a way to achieve this in ggplot directly.
You may use the n argument in geom_smooth to increase "number of points to evaluate smoother at" in order to create some more y values close to zero. Then use ggplot_build to grab the smoothed values from the ggplot object. These values are used in a geom_line, which is added on top of the original plot. Last we overplot the y = 0 values with the geom_hline.
# basic plot with a larger number of smoothed values
p <- ggplot(df, aes(x = x2, y = x1)) +
geom_line() +
geom_smooth(linetype = "blank", n = 10000)
# grab smoothed values
df2 <- ggplot_build(p)[[1]][[2]][ , c("x", "y")]
# add smoothed values with conditional color
p +
geom_line(data = df2, aes(x = x, y = y, color = y > 0)) +
geom_hline(yintercept = 0)
Something like this:
# loess data
res <- loess.smooth(df$x2, df$x1)
res <- data.frame(do.call(cbind, res))
res$posY <- ifelse(res$y >= 0, res$y, NA)
res$negY <- ifelse(res$y < 0, res$y, NA)
# plot
ggplot(df, aes(x = x2, y = x1)) +
geom_hline() +
geom_line() +
geom_line(data=res, aes(x = x, y = posY, col = "green")) +
geom_line(data=res, aes(x = x, y = negY, col = "red")) +
scale_color_identity()

Using ggplot2 how can I plot points with an aes() after plotting lines?

I'm using ggplot2 to show lines and points on a plot. What I am trying to do is to have the lines all the same color, and then to show the points colored by an attribute. My code is as follows:
# Data frame
dfDemo <- structure(list(Y = c(0.906231077471568, 0.569073561538186,
0.0783433165521566, 0.724580209473378, 0.359136092118470, 0.871301974471722,
0.400628333618918, 1.41778205350433, 0.932081770977729, 0.198188442350644
), X = c(0.208755495088456, 0.147750173706688, 0.0205864576474412,
0.162635017485883, 0.118877260137735, 0.186538613831806, 0.137831912094464,
0.293293029083812, 0.219247919537514, 0.0323148791663826), Z = c(11112951L,
11713300L, 14331476L, 11539301L, 12233602L, 15764099L, 10191778L,
12070774L, 11836422L, 15148685L)), .Names = c("Y", "X", "Z"
), row.names = c(NA, 10L), class = "data.frame")
# Variables
X = array(0.1,100)
Y = seq(length=100, from=0, by=0.01)
# make data frame
dfAll <- data.frame()
# make data frames using loop
for (x in c(1:10)){
# spacemate calc
Floors = array(x,100)
# include label
Label = paste(' ', toString(x), sep="")
df1 <- data.frame(X = X * x, Y = Y, Label)
# merge df1 to cumulative df, dfAll
dfAll <- rbind(dfAll, df1)
}
# plot
pl <- ggplot(dfAll, aes(x = X, y = Y, group = Label, colour = 'Measures')) + geom_line()
# add points to plot
pl + geom_point(data=dfDemo, aes(x = X, y = Y)) + opts(legend.position = "none")
This almost works, but I am unable to color the points by Z when I do this. I can plot the points separately, colored by Z using the following code:
ggplot(dfDemo, aes(x = X, y = Y, colour = Z)) + geom_point()
However, if I use the similar code after plotting the lines:
pl + geom_point(data=dfDemo, aes(x = X, y = Y, colour = Z)) + opts(legend.position = "none")
I get the following error:
Error: Continuous variable () supplied to discrete scale_hue.
I don't understand how to add the points to the chart so that I can colour them by a value. I appreciate any suggestion how to solve this.
The issue is that they are colliding the two colour scales, one from the ggplot call and the other from geom_point. If you want the lines of one colour and the points of different colours then you need to erase the colour setting from ggplot call and put it inside the geom_line outside the aes call so it isn't mapped. Use I() to define the colour otherwise it will think is just a variable.
pl <- ggplot(dfAll, aes(x = X, y = Y, group = Label)) +
geom_line(colour = I("red"))
pl + geom_point(data=dfDemo, aes(x = X, y = Y, colour = Z)) +
opts(legend.position = "none")
HTH

Resources