separate colours for ggplot geom_point and geom_seg - r

I am trying to use ggplot() to plot a betadispers object mod1 so that I can better control the colours.
I extracted the centroids from mod1 and I am using geom_point() for plotting the yearly replicates for each dune , geom_seg()to plot the lines for each dune-star, and a second geom_point() statement to plot the centroids.
When I plot this using
scale_colour_manual(values=cols, guide= FALSE)
it only changes the colour of the first geom_points and the geom_seg but not the centroids.
How do I control the colour of each component separately such that the dune points are coloured by cols, the segments are coloured by cols1 and the centroids use cols2?
I'd also like to change the colour of the black outline for each centroid to cols1.
library(vegan)
library(ggplot2)
cols = c("blue","red")
cols1 = c("green","dark orange")
cols2 = c("purple","yellow")
data(dune)
sites = data.frame(year = rep(c(1:5), times= 4), dune = rep(c(1:4),each=5), dune.type = rep(c("A","B"),each=10))
distances <- vegdist(dune, method = "bray")
#create Betadispersion model on betad (effectively a PCoA)
mod1 <- with(sites, betadisper(distances, dune, type = "centroid"))
s = scores(mod1)
# Get points
pnt_sites = as.data.frame(s$sites)
pnt_sites = cbind(pnt_sites, sites)
# Get centroids
pnt_centroids = as.data.frame(s$centroids)
pnt_centroids$dune = rownames(pnt_centroids)
pnt_centroids$dune.type = rep(c("A","B"),each=2)
# Calculate segments
seg = pnt_sites[, c("PCoA1", "PCoA2", "dune")]
tmp = rename(pnt_centroids, c("PCoA1" = "PCoA1_ctr", "PCoA2" = "PCoA2_ctr"))
seg = join(seg, tmp, "dune")
# Plot
ggplot() +
geom_point(
data = pnt_sites,
aes(x = PCoA1, y = PCoA2, colour = dune.type, shape = dune.type),
size = 2
) +
geom_segment(
data = seg,
aes(x = PCoA1, y = PCoA2, xend = PCoA1_ctr, yend = PCoA2_ctr, colour =
dune.type)
) +
geom_point(
data = pnt_centroids,
aes(x = PCoA1, y = PCoA2, fill = dune.type),
size = 3, shape = 21
) +
scale_colour_manual(values=cols, guide= FALSE) +
coord_equal() +
theme_bw()

You can only specify scale_colour_manual() once per plot, not multiple times for each geom_point call, so you need to combine your centroids and sites into one dataframe (adding variables for centroid/site, and centroid A/centroid B/site A/site B), then plot as a single geom_point() layer
#combine centroids and points into one dataframe
pnt_centroids$year = NA
pnt_centroids$data.type = "centroid"
pnt_sites$data.type = "site"
sites_centroids <- rbind(pnt_centroids, pnt_sites)
sites_centroids$type <- paste(sites_centroids$data.type, sites_centroids$dune.type)
When you define vectors of colors to use for scale_fill_manual and scale_colour_manual, each will have 6 levels, to match the number of variables you have (4 point types plus 2 segment types). Your site points and segments do not have a fill attribute, so fill will be ignored when plotting those points and segments, but you still need to define 6 colors in scale_fill_manual so that your filled points for centroids plot properly.
#change the cols vector definitions at the beginning of code to this
cols.fill <- c("purple", "yellow", "purple", "yellow", "purple", "yellow")
cols.colour <- c("green", "dark orange", "green", "dark orange", "blue", "red")
Specify the new colour, fill, and shape scales in the plot code like this:
# Plot
ggplot() +
geom_segment( #segment must go before point so points are in front of lines
data = seg,
aes(x = PCoA1, y = PCoA2, xend = PCoA1_ctr, yend = PCoA2_ctr, colour = dune.type)) +
geom_point(
data = sites_centroids,
aes(x = PCoA1, y = PCoA2, colour = type, fill = type, shape = type), size = 2) +
scale_colour_manual(values = cols.colour) +
scale_fill_manual(values = cols.fill, guide = FALSE) +
scale_shape_manual(values = c(21, 21, 16, 17)) +
coord_equal() +
theme_bw()
Here is the result. The legend gets a bit busy, it may be better to delete it and use text annotation to label the dune types.

Related

Is there an equivalent to points() on ggplot2

I'm working with stock prices and trying to plot the price difference.
I created one using autoplot.zoo(), my question is, how can I manage to change the point shapes to triangles when they are above the upper threshold and to circles when they are below the lower threshold. I understand that when using the basic plot() function you can do these by calling the points() function, wondering how I can do this but with ggplot2.
Here is the code for the plot:
p<-autoplot.zoo(data, geom = "line")+
geom_hline(yintercept = threshold, color="red")+
geom_hline(yintercept = -threshold, color="red")+
ggtitle("AAPL vs. SPY out of sample")
p+geom_point()
We can't fully replicate without your data, but here's an attempt with some sample generated data that should be similar enough that you can adapt for your purposes.
# Sample data
data = data.frame(date = c(2001:2020),
spread = runif(20, -10,10))
# Upper and lower threshold
thresh <- 4
You can create an additional variable that determines the shape, based on the relationship in the data itself, and pass that as an argument into ggplot.
# Create conditional data
data$outlier[data$spread > thresh] <- "Above"
data$outlier[data$spread < -thresh] <- "Below"
data$outlier[is.na(data$outlier)] <- "In Range"
library(ggplot2)
ggplot(data, aes(x = date, y = spread, shape = outlier, group = 1)) +
geom_line() +
geom_point() +
geom_hline(yintercept = c(thresh, -thresh), color = "red") +
scale_shape_manual(values = c(17,16,15))
# If you want points just above and below# Sample data
data = data.frame(date = c(2001:2020),
spread = runif(20, -10,10))
thresh <- 4
data$outlier[data$spread > thresh] <- "Above"
data$outlier[data$spread < -thresh] <- "Below"
ggplot(data, aes(x = date, y = spread, shape = outlier, group = 1)) +
geom_line() +
geom_point() +
geom_hline(yintercept = c(thresh, -thresh), color = "red") +
scale_shape_manual(values = c(17,16))
Alternatively, you can just add the points above and below the threshold as individual layers with manually specified shapes, like this. The pch argument points to shape type.
# Another way of doing this
data = data.frame(date = c(2001:2020),
spread = runif(20, -10,10))
# Upper and lower threshold
thresh <- 4
ggplot(data, aes(x = date, y = spread, group = 1)) +
geom_line() +
geom_point(data = data[data$spread>thresh,], pch = 17) +
geom_point(data = data[data$spread< (-thresh),], pch = 16) +
geom_hline(yintercept = c(thresh, -thresh), color = "red") +
scale_shape_manual(values = c(17,16))

Add legend using geom_point and geom_smooth from different dataset

I really struggle to set the correct legend for a geom_point plot with loess regression, while there is 2 data set used
I got a data set, who is summarizing activity over a day, and then I plot on the same graph, all the activity per hours and per days recorded, plus a regression curve smoothed with a loess function, plus the mean of each hours for all the days.
To be more precise, here is an example of the first code, and the graph returned, without legend, which is exactly what I expected:
# first graph, which is given what I expected but with no legend
p <- ggplot(dat1, aes(x = Hour, y = value)) +
geom_point(color = "darkgray", size = 1) +
geom_point(data = dat2, mapping = aes(x = Hour, y = mean),
color = 20, size = 3) +
geom_smooth(method = "loess", span = 0.2, color = "red", fill = "blue")
and the graph (in grey there is all the data, per hours, per days. the red curve is the loess regression. The blue dots are the means for each hours):
When I tried to set the legend I failed to plot one with the explanation for both kind of dots (data in grey, mean in blue), and the loess curve (in red). See below some example of what I tried.
# second graph, which is given what I expected + the legend for the loess that
# I wanted but with not the dot legend
p <- ggplot(dat1, aes(x = Hour, y = value)) +
geom_point(color = "darkgray", size = 1) +
geom_point(data = dat2, mapping = aes(x = Hour, y = mean),
color = "blue", size = 3) +
geom_smooth(method = "loess", span = 0.2, aes(color = "red"), fill = "blue") +
scale_color_identity(name = "legend model", guide = "legend",
labels = "loess regression \n with confidence interval")
I obtained the good legend for the curve only
and another trial :
# I tried to combine both date set into a single one as following but it did not
# work at all and I really do not understand how the legends works in ggplot2
# compared to the normal plots
A <- rbind(dat1, dat2)
p <- ggplot(A, aes(x = Heure, y = value, color = variable)) +
geom_point(data = subset(A, variable == "data"), size = 1) +
geom_point(data = subset(A, variable == "Moy"), size = 3) +
geom_smooth(method = "loess", span = 0.2, aes(color = "red"), fill = "blue") +
scale_color_manual(name = "légende",
labels = c("Data", "Moy", "loess regression \n with confidence interval"),
values = c("darkgray", "royalblue", "red"))
It appears that all the legend settings are mixed together in a "weird" way, the is a grey dot covering by a grey line, and then the same in blue and in red (for the 3 labels). all got a background filled in blue:
If you need to label the mean, might need to be a bit creative, because it's not so easy to add legend manually in ggplot.
I simulate something that looks like your data below.
dat1 = data.frame(
Hour = rep(1:24,each=10),
value = c(rnorm(60,0,1),rnorm(60,2,1),rnorm(60,1,1),rnorm(60,-1,1))
)
# classify this as raw data
dat1$Data = "Raw"
# calculate mean like you did
dat2 <- dat1 %>% group_by(Hour) %>% summarise(value=mean(value))
# classify this as mean
dat2$Data = "Mean"
# combine the data frames
plotdat <- rbind(dat1,dat2)
# add a dummy variable, we'll use it later
plotdat$line = "Loess-Smooth"
We make the basic dot plot first:
ggplot(plotdat, aes(x = Hour, y = value,col=Data,size=Data)) +
geom_point() +
scale_color_manual(values=c("blue","darkgray"))+
scale_size_manual(values=c(3,1),guide=FALSE)
Note with the size, we set guide to FALSE so it will not appear. Now we add the loess smooth, one way to introduce the legend is to introduce a linetype, and since there's only one group, you will have just one variable:
ggplot(plotdat, aes(x = Hour, y = value,col=Data,size=Data)) +
geom_point() +
scale_color_manual(values=c("blue","darkgray"))+
scale_size_manual(values=c(3,1),guide=FALSE)+
geom_smooth(data=subset(plotdat,Data="Raw"),
aes(linetype=line),size=1,alpha=0.3,
method = "loess", span = 0.2, color = "red", fill = "blue")

Plot different parts of a vector with different colors on the same graph

As from the title suppose this vector and plot:
plot(rnorm(200,5,2),type="l")
This returns this plot
What i would like to know is whether there is a way to make the first half of it to be in blue col="blue" and the rest of it to be in red "col="red".
Similar question BUT in Matlab not R: Here
You could simply use lines for the second half:
dat <- rnorm(200, 5, 2)
plot(1:100, dat[1:100], col = "blue", type = "l", xlim = c(0, 200), ylim = c(min(dat), max(dat)))
lines(101:200, dat[101:200], col = "red")
Not a base R solution, but I think this is how to plot it using ggplot2. It is necessary to prepare a data frame to plot the data.
set.seed(1234)
vec <- rnorm(200,5,2)
dat <- data.frame(Value = vec)
dat$Group <- as.character(rep(c(1, 2), each = 100))
dat$Index <- 1:200
library(ggplot2)
ggplot(dat, aes(x = Index, y = Value)) +
geom_line(aes(color = Group)) +
scale_color_manual(values = c("blue", "red")) +
theme_classic()
We can also use the lattice package with the same data frame.
library(lattice)
xyplot(Value ~ Index, data = dat, type = 'l', groups = Group, col = c("blue", "red"))
Notice that the blue line and red line are disconnected. Not sure if this is important, but if you want to plot a continuous line, here is a workaround in ggplot2. The idea is to subset the data frame for the second half, plot the entire data frame with color as blue, and then plot the second data frame with color as red.
dat2 <- dat[dat$Index %in% 101:200, ]
ggplot(dat, aes(x = Index, y = Value)) +
geom_line(color = "blue") +
geom_line(data = dat2, aes(x = Index, y = Value), color = "red") +
theme_classic()

how to customize names of legend labels in ggplot

I have the following code:
p <- ggplot() + coord_fixed() + xlab("") + ylab("")
base_world <- p + geom_polygon(data=world_map, aes(x=long, y=lat, group=group),
colour="green", fill="whitesmoke") +
geom_point(data = as.data.frame(coordinates(busxy)), size = 1,
mapping = aes(x = busxy#coords[,1], y = busxy#coords[,2],
color = busxy$color)) +
labs(title = "Cities\n", color = "States\n") +
scale_color_manual(labels = col2state$s, values = col2state$c)
It prints this:
The problem is the colors on map doesn't correspond with those in legend.
When I delete scale_color_manual(labels = col2state$s, values = col2state$c) from the plot it's all right but 'States' have names of colors from the data.
My question is: How to leave labels names like in the image but also assign proper colors to those labels as in the map?
In col2state$c are 29 color names (like #29A934)
In col2state$s are 29 state labels like in legend.
Data frame busxy contains 144k records with 29 unique values of states.
Data is from:
library(maps)
world_map <- map_data("world")
busxy <- data.frame(x=bus[[1]]$latitude, y=bus[[1]]$longitude, city=bus[[1]]$city, state=bus[[1]]$state)
bus <- llply(as.list(jfile5), function(x) jsonlite::stream_in(file(x), pagesize = 10000))
and jfile5 is the path to json file contains all data.
scale_color_manual can do without the labels parameter. A way to solve this is scale_color_manual(values = my_colors) where my_colors is the mapping of countries to their color, organized in a named character vector, e.g. c(AZ = "blue", NV = "red", ...)
An example:
df <- data.frame(x=1:3, y = 2:4, f = as.factor(1:3))
my_colors <- c('1'= "blue", '2' = "red", '3' = "yellow")
ggplot(df) + geom_point(aes(x = x, y = y, color = f)) + scale_color_manual(values = my_colors)
Instead of calling the columns, save vectors of the unique values for color and for state labels. Ensure the vectors are ordered to match.
vec_c <- unique(col2state$c)
vec_s <- unique(col2state$s) #may need to re-order, or opt to manually create vector
scale_color_manual(labels = vec_c, values = vec_s)

row column heatmap plot with overlayed circle (fill and size) in r

Here is a graph I am trying to develop:
I have row and column coordinate variables, also three quatitative variables (rectheat = to fill the rectangle heatmap,circlesize = size of circles, circlefill = fill color heatmap). NA should be missing represented by a different color (for example gray color).
The following is data:
set.seed (1234)
rectheat = sample(c(rnorm (10, 5,1), NA, NA), 7*14, replace = T)
dataf <- data.frame (rowv = rep (1:7, 14), columnv = rep(1:14, each = 7),
rectheat, circlesize = rectheat*1.5,
circlefill = rectheat*10 )
dataf
Here is code that I worked on:
require(ggplot2)
ggplot(dataf, aes(y = factor(rowv),x = factor(columnv))) +
geom_rect(aes(colour = rectheat)) +
geom_point(aes(colour = circlefill, size =circlesize)) + theme_bw()
I am not sure if geom_rect is appropriate and other part is fine as I could not get any results except errors.
Here it is better to use geom_tile (heatmap).
require(ggplot2)
ggplot(dataf, aes(y = factor(rowv),
x = factor(columnv))) + ## global aes
geom_tile(aes(fill = rectheat)) + ## to get the rect filled
geom_point(aes(colour = circlefill,
size =circlesize)) + ## geom_point for circle illusion
scale_color_gradient(low = "yellow",
high = "red")+ ## color of the corresponding aes
scale_size(range = c(1, 20))+ ## to tune the size of circles
theme_bw()

Resources