I have the following code
library(ggplot2)
library(dplyr)
# create data
time <- as.numeric(rep(seq(1,7),each=7)) # x Axis
value <- runif(49, 10, 100) # y Axis
group <- rep(LETTERS[1:7],times=7) # group, one shape per group
data <- data.frame(time, value, group)
# stacked area chart
ggplot(data, aes(x=time, y=value, fill=group)) +
geom_area()+
geom_text(data = data %>% filter(time == last(time)), aes(label = group,
x = time + 0.5,
y = value,
color = group)) +
guides(color = FALSE) + theme_bw() +
scale_x_continuous(breaks = scales::pretty_breaks(10))
Where i get
But i am aiming for link
Is there any solution for stacked area plot?
The question code is plotting the text labels in the value's of the last time, when in fact the areas are cumulative. And in reverse order.
Also, the following graph plots data created with the same code but with
set.seed(1234)
Then the data creation code is the same as in the question.
# stacked area chart
ggplot(data, aes(x=time, y=value, fill=group)) +
geom_area()+
geom_text(data = data %>%
filter(time == last(time)) %>%
mutate(value = cumsum(rev(value))),
aes(label = rev(group),
x = time + 0.5,
y = value,
color = rev(group))) +
guides(color = FALSE) + theme_bw() +
scale_x_continuous(breaks = scales::pretty_breaks(10))
Edit.
Following the discussion in the comments to this answer, I have decided to post code based on the comment by user Jake Kaupp.
ggplot(data, aes(x = time, y = value, fill = group)) +
geom_area()+
geom_text(data = data %>% filter(time == last(time)),
aes(x = time + 0.5, y = value,
label = rev(group), color = rev(group)),
position = position_stack(vjust = 0.5)) +
guides(color = FALSE) +
theme_bw() +
scale_x_continuous(breaks = scales::pretty_breaks(10))
You can use the text function to put text wherever you want. For example:
text(7.2, 350, "B", col="brown")
Here we go
time <- as.numeric(rep(seq(1,7),each=8)) # x Axis
value <- runif(56, 10, 100) # y Axis
group <- rep(LETTERS[1:8],times=7) # group, one shape per group
data <- data.frame(time, value, group)
round_df <- function(x, digits) {
# round all numeric variables
# x: data frame
# digits: number of digits to round
numeric_columns <- sapply(x, mode) == 'numeric'
x[numeric_columns] <- round(x[numeric_columns], digits)
x
}
data$value<- round_df(data$value, 2)
# stacked area chart
ggplot(data, aes(x=time, y=value, fill=group)) +
geom_area()+
geom_text(aes(x = time + 0.5, y = value, label=ifelse(time == max(time), group, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)+
guides(color = FALSE) + theme_bw()+
scale_x_continuous(breaks = scales::pretty_breaks(10)) +
geom_text(aes(label=ifelse(time != min(time) & time != max(time),value, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)+
geom_text(aes(x = time + 0.18,label=ifelse(time == min(time),value, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)+
geom_text(aes(x = time - 0.18,label=ifelse(time == max(time),value, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)
And get
Factor levels but why not letters? That is the next step :)
UPDATED
just converted factor to char data$group <- as.character(data$group)
Related
I have the following graph and code:
Graph
ggplot(long2, aes(x = DATA, y = value, fill = variable)) + geom_area(position="fill", alpha=0.75) +
scale_y_continuous(labels = scales::comma,n.breaks = 5,breaks = waiver()) +
scale_fill_viridis_d() +
scale_x_date(date_labels = "%b/%Y",date_breaks = "6 months") +
ggtitle("Proporcions de les visites, només 9T i 9C") +
xlab("Data") + ylab("% visites") +
theme_minimal() + theme(legend.position="bottom") + guides(fill=guide_legend(title=NULL)) +
annotate("rect", fill = "white", alpha = 0.3,
xmin = as.Date.character("2020-03-16"), xmax = as.Date.character("2020-06-22"),
ymin = 0, ymax = 1)
But it has some sawtooth, how am I supposed to smooth it out?
I believe your situation is roughly analogous to the following, wherein we have missing x-positions for one group, but not the other at the same position. This causes spikes if you set position = "fill".
library(ggplot2)
x <- seq_len(100)
df <- data.frame(
x = c(x[-c(25, 75)], x[-50]),
y = c(cos(x[-c(25, 75)]), sin(x[-50])) + 5,
group = rep(c("A", "B"), c(98, 99))
)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
To smooth out these spikes, it has been suggested to linearly interpolate the data at the missing positions.
# Find all used x-positions
ux <- unique(df$x)
# Split data by group, interpolate data groupwise
df <- lapply(split(df, df$group), function(xy) {
approxed <- approx(xy$x, xy$y, xout = ux)
data.frame(x = ux, y = approxed$y, group = xy$group[1])
})
# Recombine data
df <- do.call(rbind, df)
# Now without spikes :)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
Created on 2022-06-17 by the reprex package (v2.0.1)
P.S. I would also have expected a red spike at x=50, but for some reason this didn't happen.
here is my data and my area chart:
df<- data.frame(age=15:40,h1= 25:50,h2=35:60,h3=45:70)
data1<- df %>% gather(timeuse, minute, -age)
ggplot(data1, aes(x = age, y = minute, fill = timeuse)) +
geom_area() +
scale_fill_brewer(palette = "Gray")+
scale_x_continuous(breaks = seq(15, 90, by = 5))+
scale_y_continuous(breaks = seq(0, 1500, by = 100))+
theme_classic()
I want to put legend inside the area chart like this picture:
In general that could be easily achieved using geom_text with position = position_stack(vjust = 0.5). Depending on your real data the tricky part would be to select the x positions where you want to place the labels. In my code below I use dplyr::case_when to set different positions for each category of timeuse. Additionally depending on your real data it might be worthwhile to have a look at ggrepel::geom_text_repel.
library(ggplot2)
library(dplyr)
data1 <- data1 %>%
mutate(label = case_when(
timeuse == "h3" & age == 20 ~ timeuse,
timeuse == "h2" & age == 27 ~ timeuse,
timeuse == "h1" & age == 35 ~ timeuse,
TRUE ~ ""
))
p <- ggplot(data1, aes(x = age, y = minute, fill = timeuse)) +
geom_area() +
scale_fill_brewer(palette = "Greys")+
scale_x_continuous(breaks = seq(15, 90, by = 5))+
scale_y_continuous(breaks = seq(0, 1500, by = 100))+
theme_classic()
p +
geom_text(aes(label = label), position = position_stack(vjust = 0.5)) +
guides(fill = "none")
You can do it manually with annotate
annotate("text", x=50, y=2, label= "market work")
or more automated, something like this (play with the selection of rows where you want to place them):
geom_text(data = df%>% group_by(timeuse) %>% sample_n(1),
aes(x = Age, y = minute,
label = rev(timeuse), color = rev(timeuse)),
position = position_stack(vjust = 0.5))
How do I color outliers that are above a specific value using ggplot2 in R?.
(Sorry for the seemingly easy question, I am a beginner. the reason why is that these are frequencies of a value of 0, I am then transforming this column of data by taking the -log10(). So anything that has a frequency of 0 would then be transformed into Inf. Attached is a screenshot of my plot, essentially I want to make all the outlier points above 10 on the y axis to be a different color.
boxplots <- function(df){
df$'frequency'[is.na(df$'frequency')] <- 0.00
df$'-log10(frequency)' <- -log10(df$'frequency')
x <- data.frame(group = 'x', value = df$'-log10(frequency)'[df$'Type'=='x'])
y <- data.frame(group = 'y', value = df$'-log10(frequency)'[df$'Type'=='y'])
z <- data.frame(group = 'z', value = df$'-log10(frequency)'[df$'Type'=='c=z'])
plot.data <<- rbind(x, y, z)
labels <- c("z", "y", "z")
t<-plot.data %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)+
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Distribution of -log10(frequency) by Type") +
xlab("Type")+
ylab("-log10(frequency)")+
scale_x_discrete(labels=labels)+
scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2))
print(t)
s<<-t
ggsave("frequency_by_type.png", plot = t)
}
you could just create a new column indicating wheather it is an outlier or not and map this to the geom_jitter color. I resumed the answer in a smaller example but you should be able to fit this accordingly:
library(ggplot2)
library(viridis)
plot.data <- data.frame(group = c("1","1","1","1","1","2","2","2","2","2"),
value = c(1,5,10,6,3,1,5,10,6,3))
t<-plot.data %>%
mutate(outlier = ifelse(value >9, "YES", "NO")) %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
geom_jitter(aes(group, value, color = outlier) , size=2, alpha=0.9)+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)
t
library(ggplot2)
# Basic box plot
p <- ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot()
p
# Rotate the box plot
p + coord_flip()
# Notched box plot
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(notch=TRUE)
# Change outlier, color, shape and size
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(outlier.colour="red", outlier.shape=8,
outlier.size=4)
I'm using ggplot2 to plot the annual occurrence of events in states. I want the state labels to be in the same order as shown in the data table "AZ CT NH NM DE..." but ggplot automatically reorganizes the state labels in alphabetical order "AZ CT DE NH...". I created groups so I could display ranges in "num" values (ex. NM and TN). Please ignore the group numbering--I took out some data points to make the table smaller.
ggplot(guidelines, aes(x = state, y = num, group = grp)) +
geom_point() + geom_line(linetype = "dotted") +
labs(x = "State", y = "Number") +
labs(title = "A") +
scale_y_continuous(breaks = seq(0, 11, 1),
limits=c(0,11))
I have tried the suggestions of previous posts to use factor and levels like so:
guidelines$state <- factor(guidelines$state, levels = unique(guidelines$state)
But it does not work because I am using groups and repeating state names. Any ideas on how to get around this?
We can use ordered
library(dplyr)
library(ggplot2)
guidelines %>%
mutate(state =ordered(state, levels = unique(state))) %>%
ggplot(aes(x = state, y = num, group = grp)) +
geom_point() +
geom_line(linetype = "dotted") +
labs(x = "State", y = "Number") +
labs(title = "A") +
scale_y_continuous(breaks = seq(0, 11, 1),
limits=c(0,11))
-output
Try this. You were close in that you must use unique(). Adding ordered=T inside the factor() will keep the desired order. Here the code (Please next time share your data using dput() as sometimes it can be complex to use data from screenshots in they are really big):
library(ggplot2)
#Data
guidelines <- data.frame(state=c('AZ','CT','NH','NM','NM','DE','NJ','TN','TN'),
num=c(10,10,10,5,10,5,5,2,5),
grp=c(3,4,17,19,19,5,18,25,25),stringsAsFactors = F)
#Format factor
guidelines$state <- factor(guidelines$state,levels = unique(guidelines$state),ordered = T)
#Plot
ggplot(guidelines, aes(x = state, y = num, group = grp)) +
geom_point() + geom_line(linetype = "dotted") +
labs(x = "State", y = "Number") +
labs(title = "A") +
scale_y_continuous(breaks = seq(0, 11, 1),
limits=c(0,11))
Output:
Or as mentioned in comments by #TTS you can use this the scale_x_discrete() with limits option:
#Data
guidelines <- data.frame(state=c('AZ','CT','NH','NM','NM','DE','NJ','TN','TN'),
num=c(10,10,10,5,10,5,5,2,5),
grp=c(3,4,17,19,19,5,18,25,25),stringsAsFactors = F)
#Plot 2
ggplot(guidelines, aes(x = state, y = num, group = grp)) +
geom_point() + geom_line(linetype = "dotted") +
labs(x = "State", y = "Number") +
labs(title = "A") +
scale_y_continuous(breaks = seq(0, 11, 1),
limits=c(0,11))+
scale_x_discrete(limits=unique(guidelines$state))
Output:
I am using facet_grid() to plot multiple plot divided per groups of data. For each plot, I want to add in the corner the highest value of the Y axis. I've tried several hacks but it never gives me the expected results. This answer partially helps me but the value I want to add will constantly be changing, therefore I don't see how I can apply it.
Here is a minimal example, I'd like to add the red numbers on the graph below:
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
Thanks for your help!
library(dplyr)
data2 <- data %>% group_by(group) %>% summarise(Max = max(value))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(aes(label = Max), x = Inf, y = Inf, data2,
hjust = 2, vjust = 2, col = 'red') +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
This does the trick. If you always have fixed ranges you can position the text manually.
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(
aes(x, y, label=lab),
data = data.frame(
x=Inf,
y=Inf,
lab=tapply(data$value, data$group, max),
group=unique(data$group)
),
vjust="inward",
hjust = "inward"
) +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)