I'm trying to invert the factors order in only 1 bar in ggplot 2. Reordering the data without define them as a factor usually works, but not in the newest versions.
Example:
I want to invert the factors in the last column (green up, red down).
library(ggplot2)
dados <- expand.grid(a = letters[1:5], b = letters[1:2])
dados$a <- paste(dados$a)
dados$b <- paste(dados$b)
dados$val <- rnorm(10, 5, 1)
ggplot(aes(x = a, y = val, fill = b), data = dados) + geom_bar(stat = 'identity')
dados2 <- rbind(tail(dados, -1), head(dados, 1))
ggplot(aes(x = a, y = val, fill = b), data = dados2) + geom_bar(stat = 'identity') # Used to work :/
I have assigned two additional parameters to e in column b see below c and d:
a b val
2 b a 4.504735
3 c a 5.396658
4 d a 6.796288
5 e c 5.900308
6 a b 3.900510
7 b b 4.454316
8 c b 5.411198
9 d b 6.389902
10 e d 4.458425
1 a a 4.986175
by scale_fill_manual I invert the two colours
ggplot(aes(x = a, y = val, fill = b), data = dados2) +
geom_bar(stat = 'identity') +
scale_fill_manual(values = c("a"= "red", "b"= "green",'c'= "green", "d"="red"))
Related
Say I have the following data frame:
# Set seed for RNG
set.seed(33550336)
# Create toy data frame
loc_x <- c(a = 1, b = 2, c = 3)
loc_y <- c(a = 3, b = 2, c = 1)
scaling <- c(temp = 100, sal = 10, chl = 1)
df <- expand.grid(loc_name = letters[1:3],
variables = c("temp", "sal", "chl"),
season = c("spring", "autumn")) %>%
mutate(loc_x = loc_x[loc_name],
loc_y = loc_y[loc_name],
value = runif(nrow(.)),
value = value * scaling[variables])
which looks like,
# > head(df)
# loc_name variables season loc_x loc_y value
# 1 a temp spring 1 3 86.364697
# 2 b temp spring 2 2 35.222573
# 3 c temp spring 3 1 52.574082
# 4 a sal spring 1 3 0.667227
# 5 b sal spring 2 2 3.751383
# 6 c sal spring 3 1 9.197086
I want to plot these data in a facet grid using variables and season to define panels, like this:
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season)
g
As you can see, different variables have very different scales. So, I use scales = "free" to account for this.
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g
Mucho convenient. Now, say I want to do this, but plot the points by loc_x and loc_y and have value represented by colour instead of y position:
g <- ggplot(df) + geom_point(aes(x = loc_x, y = loc_y, colour = value),
size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g <- g + scale_colour_gradient2(low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = 50)
g
Notice that the colour scales are not free and, like the first figure, values for sal and chl cannot be read easily.
My question: is it possible to do an equivalent of scales = "free" but for colour, so that each row (in this case) has a separate colour bar? Or, do I have to plot each variable (i.e., row in the figure) and patch them together using something like cowplot?
Using the development version of dplyr:
library(dplyr)
library(purrr)
library(ggplot2)
library(cowplot)
df %>%
group_split(variables, season) %>%
map(
~ggplot(., aes(loc_x, loc_y, color = value)) +
geom_point(size = 5) +
scale_colour_gradient2(
low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = median(.$value)
) +
facet_grid(~ variables + season, labeller = function(x) label_value(x, multi_line = FALSE))
) %>%
plot_grid(plotlist = ., align = 'hv', ncol = 2)
I used the code below to create my plot above. Is there a way to adapt my code so that I do not have the long red line joining the two periods of non-peak hours?
Day_2 <- non_cumul[(non_cumul$Day.No == 'Day 2'),]
Day_2$time_test <- between(as.ITime(Day_2$date_time),
as.ITime("09:00:00"),
as.ITime("17:00:00"))
Day2plot <- ggplot(Day_2,
aes(date_time, non_cumul_measurement, color = time_test)) +
geom_point()+
geom_line() +
theme(plot.title = element_text(hjust = 0.5)) +
ggtitle('Water Meter Averages (Thurs 4th Of Jan 2018)',
'Generally greater water usage between peak hours compared to non peak hours') +
xlab('Date_Times') +
ylab('Measurement in Cubic Feet') +
scale_color_discrete(name="Peak Hours?")
Day2plot +
theme(axis.title.x = element_text(face="bold", colour="black", size=10),
axis.text.x = element_text(angle=90, vjust=0.5, size=10))
From the sound of it, your plot comprises of one observation for each position on the x-axis, and you want consecutive observations of the same color to be joined together in a line.
Here's a simple example that reproduces this:
set.seed(5)
df = data.frame(
x = seq(1, 20),
y = rnorm(20),
color = c(rep("A", 5), rep("B", 9), rep("A", 6))
)
ggplot(df,
aes(x = x, y = y, color = color)) +
geom_line() +
geom_point()
The following code creates a new column "group", which takes on a different value for each collection of consecutive points with the same color. "prev.color" and "change.color" are intermediary columns, included here for clarity:
library(dplyr)
df2 <- df %>%
arrange(x) %>%
mutate(prev.color = lag(color)) %>%
mutate(change.color = is.na(prev.color) | color != prev.color) %>%
mutate(group = cumsum(change.color))
> head(df2, 10)
x y color prev.color change.color group
1 1 -0.84085548 A <NA> TRUE 1
2 2 1.38435934 A A FALSE 1
3 3 -1.25549186 A A FALSE 1
4 4 0.07014277 A A FALSE 1
5 5 1.71144087 A A FALSE 1
6 6 -0.60290798 B A TRUE 2
7 7 -0.47216639 B B FALSE 2
8 8 -0.63537131 B B FALSE 2
9 9 -0.28577363 B B FALSE 2
10 10 0.13810822 B B FALSE 2
ggplot(df2,
aes(x = x, y = y, color = colour, group = group)) +
geom_line() +
geom_point()
Say I have the following data frame:
# Dummy data frame
df <- data.frame(x = rep(1:5, 2), y = runif(10), z = rep(c("A", "B"), each = 5))
# x y z
# 1 1 0.92024937 A
# 2 2 0.37246007 A
# 3 3 0.76632809 A
# 4 4 0.03418754 A
# 5 5 0.33770400 A
# 6 1 0.15367174 B
# 7 2 0.78498276 B
# 8 3 0.03341913 B
# 9 4 0.77484244 B
# 10 5 0.13309999 B
I'd like to plot cases where z == "A" as points and cases where z == "B" as lines. Simple enough.
library(ggplot2)
# Plot data
g <- ggplot()
g <- g + geom_point(data = df %>% filter(z == "A"), aes(x = x, y = y))
g <- g + geom_line(data = df %>% filter(z == "B"), aes(x = x, y = y))
g
My data frame and aesthetic for the points and lines are identical, so this seems a bit verbose – especially if I want to do this lots of times (e.g., z == "A" through z == "Z"). Is there a way that I could state ggplot(df, aes(x = x, y = y)) and then subsequently state my filtering or subsetting criteria within the appropriate geoms?
I find the example in the question itself the most readable, although verbose. The second part of the question about dealing with more cases just requires a more sophisticated test in filter using for example %in% (or grep, grepl, etc.) when dealing with multiple cases. Taking advantage of the possibility of accessing default plot data within a layer, and as mentioned by #MrFlick moving the mapping of aesthetics out of the individual layers results in more concise code. All earlier answers get the plot done, so in this respect my answer is not better than any of them...
library(ggplot2)
library(dplyr)
df <- data.frame(x = rep(1:5, 4),
y = runif(20),
z = rep(c("A", "B", "C", "Z"), each = 5))
g <- ggplot(data = df, aes(x = x, y = y)) +
geom_point(data = . %>% filter(z %in% c("A", "B", "C"))) +
geom_line(data = . %>% filter(z == "Z"))
g
Another option would be to spread the data and then just supply the y aesthetic.
library(tidyverse)
df %>% spread(z,y) %>%
ggplot(aes(x = x))+
geom_point(aes(y = A))+
geom_line(aes(y = B))
You can plot lines and points for all z records, but remove unwanted lines and points with passing NA to scale_linetype_manual and scale_shape_manual:
library(ggplot2)
ggplot(df, aes(x, y, linetype = z, shape = z)) +
geom_line() +
geom_point() +
scale_linetype_manual(values = c(1, NA)) +
scale_shape_manual(values = c(NA, 16))
The following is how my data frame looks like:
CatA CatB CatC
1 Y A
1 N B
1 Y C
2 Y A
3 N B
2 N C
3 Y A
4 Y B
4 N C
5 N A
5 Y B
I want to have CatA on X-Axis, and its count on Y-Axis. This graph comes fine. However, I want to create group for CatB and stack it with CatC keeping count in Y axis. This is what I have tried, and this is how it looks:
I want it to look like this:
My code:
ggplot(data, aes(factor(data$catA), data$catB, fill = data$catC))
+ geom_bar(stat="identity", position = "stack")
+ theme_bw() + facet_grid( ~ data$catC)
PS: I am sorry for providing links to images because I am not able to upload it, it gives me error occurred at imgur, every time I upload.
You could use facets:
df <- data.frame(A = sample(1:5, 30, T),
B = sample(c('Y', 'N'), 30, T),
C = rep(LETTERS[1:3], 10))
ggplot(df) + geom_bar(aes(B, fill = C), position = 'stack', width = 0.9) +
facet_wrap(~A, nrow = 1) + theme(panel.spacing = unit(0, "lines"))
I am having trouble plotting paired data with ggplot2.
So, I have a database with paired (idpair) individuals (id) and their respective sequences, such as
idpair id 1 2 3 4 5 6 7 8 9 10
1 1 1 d b d a c a d d a b
2 1 2 e d a c c d a b a c
3 2 3 e a a a a c d b c e
4 2 4 d d b c d e a a a b
...
What I would like is to plot all the sequences but that somewhat we can visually distinguish the pair.
I thought of using the grid such as: facet_grid(idpair~.). My issue looks like this:
How could I plot the two sequences side by side removing the "vacuum" in between caused by the other idpair ?
Any suggestions of alternative plotting of paired data are very welcome.
My code
library(ggplot2)
library(dplyr)
library(reshape2)
dtmelt = dt %>% melt(id.vars = c('idpair', 'id')) %>% arrange(idpair, id, variable)
dtmelt %>% ggplot(aes(y = id, x = variable, fill = value)) +
geom_tile() + scale_fill_brewer(palette = 'Set3') +
facet_grid(idpair~.) + theme(legend.position = "none")
generate the data
dt = as.data.frame( cbind( sort( rep(1:10, 2) ) , 1:20, replicate(10, sample(letters[1:5], 20, replace = T)) ) )
colnames(dt) = c('idpair', 'id', 1:10)
You can remove the unused levels in the facet by setting scales = "free_y". This will vary the y-axis limits for each facet.
dtmelt %>% ggplot(aes(y = id, x = variable, fill = value)) +
geom_tile() + scale_fill_brewer(palette = 'Set3') +
facet_grid(idpair~., scales = "free_y") + theme(legend.position = "none")