My goal is to create a separate 4-panel plot (right y-axis) for each of the 12 months (top x-axis) for the given years (top x-axis) in the data set. I also want to create a legend with the names of the points (V5 and V6) overlaid in 2 of the panels (V2 and V3). So instead of 1 large plot, there should be 12 plots.
After the reproducible example is my current plot.
Can you offer assistance on how to reach my goal?
Thank you.
library(ggplot2)
library(data.table)
startdate <- as.POSIXct("2008-09-12 10:00:00")
enddate <- as.POSIXct("2011-04-26 23:45:00")
interval <- 1296000
Time <- seq(from = startdate, by = interval/2, to = enddate)
set.seed(1)
timeframe <- data.frame(Time, V1 = abs(rnorm(length(Time))), V2 =
abs(rnorm(length(Time))), V3 = abs(rnorm(length(Time))), V4 =
abs(rnorm(length(Time))), V5 = abs(rnorm(length(Time))), V6 =
abs(rnorm(length(Time))))
timeframe <- setDT(timeframe)
The following Month and Year functions are derived from the waterYear function in smwrBase.
Month <- function (x, numeric = FALSE)
{
x <- as.POSIXlt(x)
yr <- x$year + 1900L
mn <- x$mon + 1L
if (numeric)
return(mn)
ordered(mn)
}
Year <- function (x, numeric = FALSE)
{
x <- as.POSIXlt(x)
yr <- x$year + 1900L
mn <- x$mon + 1L
if (numeric)
return(yr)
ordered(yr)
}
# month
mn <- Month(timeframe$Time, numeric = TRUE)
# year
yr <- Year(timeframe$Time, numeric = TRUE)
The following plot method is derived from Add a geom layer for a single panel in a faceted plot
timeframe <- data.table(timeframe, mn, yr)
setnames(timeframe, 8:9, c("Month", "Year"))
setkey(timeframe, Time)
df1 <- setDF(timeframe[, list(Time, V1, Month, Year)])
df2 <- setDF(timeframe[, list(Time, V2, Month, Year)])
df3 <- setDF(timeframe[, list(Time, V3, Month, Year)])
df4 <- setDF(timeframe[, list(Time, V4, Month, Year)])
df5 <- setDF(timeframe[, list(Time, V5, Month, Year)])
df6 <- setDF(timeframe[, list(Time, V6, Month, Year)])
names(df1) <- c("Time", "value", "Month", "Year")
names(df2) <- c("Time", "value", "Month", "Year")
names(df3) <- c("Time", "value", "Month", "Year")
names(df4) <- c("Time", "value", "Month", "Year")
names(df5) <- c("Time", "value", "Month", "Year")
names(df6) <- c("Time", "value", "Month", "Year")
df1$panel <- "V1"
df2$panel <- "V2"
df3$panel <- "V3"
df4$panel <- "V4"
df5$panel <- "V2"
df6$panel <- "V3"
dff <- rbind(df1, df2, df3, df4)
p <- ggplot(data = dff, mapping = aes(x = Time, y = value))
p <- p + facet_grid(panel ~ Month + Year, scale = "free")
p <- p + layer(data = df1, geom = "line")
p <- p + layer(data = df2, geom = "line")
p <- p + layer(data = df5, geom = "point", colour = "green")
p <- p + layer(data = df3, geom = "line")
p <- p + layer(data = df6, geom = "point", colour = "red")
p <- p + layer(data = df4, geom = "line") +
scale_fill_manual(values=c("green", "red"), name="Legend",
labels=c("v5", "v6")) # this last part is my attempt at creating the legend
p
Assistance with the facet_grid came from http://docs.ggplot2.org/current/facet_grid.html
ggplot is generally much happier with the data in a long format. Thus, start by reshaping your data. Then it's rather straightforward to use one data set for the lines and one for the points, and map a variable to the color aesthetics for the points.
# melt data from wide to long format
library(reshape2)
df <- melt(timeframe, id.vars = "Time")
# create year and month variables
df$year <- format(df$Time, "%Y")
df$month <- format(df$Time, "%m")
# select data for lines
d1 <- df[!df$variable %in% c("V5", "V6"), ]
# select data for points
d2 <- df[df$variable %in% c("V5", "V6"), ]
# rename V5 and V6 to place them in correct panels
d2$variable[d2$variable == "V5"] <- "V2"
d2$variable[d2$variable == "V6"] <- "V3"
# plot
ggplot() +
geom_line(data = d1, aes(x = Time, y = value)) +
geom_point(data = d2, aes(x = Time, y = value, color = variable)) +
facet_grid(variable ~ month + year, scale = "free") +
scale_color_manual(values = c("green", "red"), name = "Legend",
labels = c("V5", "V6"))
This is the complete answer to my question above, most of it relying on Henrik's answer. Thank you Henrik.
library(ggplot2)
library(reshape2)
startdate <- as.POSIXct("2008-09-12 10:00:00")
enddate <- as.POSIXct("2011-04-26 23:45:00")
interval <- 1296000
Time <- seq(from = startdate, by = interval/2, to = enddate)
set.seed(1)
timeframe <- data.frame(Time, V1 = abs(rnorm(length(Time))), V2 =
abs(rnorm(length(Time))), V3 = abs(rnorm(length(Time))), V4 =
abs(rnorm(length(Time))), V5 = abs(rnorm(length(Time))), V6 =
abs(rnorm(length(Time))))
df <- melt(timeframe, id.vars = "Time")
# create year and month variables
df$year <- format(df$Time, "%Y")
df$month <- format(df$Time, "%b")
# select data for lines
d1 <- df[!df$variable %in% c("V5", "V6"), ]
# select data for points
d2 <- df[df$variable %in% c("V5", "V6"), ]
# rename V5 and V6 to place them in correct panels
d2$variable[d2$variable == "V5"] <- "V2"
d2$variable[d2$variable == "V6"] <- "V3"
Source for the code below:
Selecting and plotting months in ggplot2
# separate plot for each month
for (u in unique(df$month)) {
p <- ggplot() + geom_line(data = d1[format(d1$Time,"%b")==u, ], aes(x =
Time, y = value)) + geom_point(data = d2[format(d2$Time,"%b")==u, ], aes(x= Time,
y = value, color = variable)) + facet_grid(variable ~ month + year, scale = "free")
+ scale_color_manual(values = c("green", "red"), name = "Legend",
labels = c("V5", "V6"))
print(p)
}
Related
Here is the codes and the present outplot
df <- data.frame(state = c('0','1'),
male = c(26287942,9134784),
female = c(16234000,4406645))
#output
> df
state male female
1 0 26287942 16234000
2 1 9134784 4406645
library(ggplot2)
library(tidyr)
df_long <- pivot_longer(df, cols = c("female","male"))
names(df_long) <- c('state','sex','observations')
ggplot(data = df_long) +
geom_col(aes(x = sex, y =observations, fill = state)) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey') )
I want to adjust the plots like this. (I marked what I want to change.)
Simplify the scientific records in y-axis.
Count the ratio (the number of state 1)/(the number of state 0 + state 1) and plot like this.
It may be a little complicated, and I don't know which functions to use. If possible, can anyone tell me some related functions or examples?
You can set options(scipen = 99) to disable scientific notation on y-axis. We can create a separate dataset for label data.
library(tidyverse)
options(scipen = 99)
long_data <- df %>%
pivot_longer(cols = c(male, female),
names_to = "sex",
values_to = "observations")
label_data <- long_data %>%
group_by(sex) %>%
summarise(perc = observations[match(1, state)]/sum(observations),
total = sum(observations), .groups = "drop")
ggplot(long_data) +
geom_col(aes(x = sex, y = observations, fill = state)) +
geom_text(data = label_data,
aes(label = round(perc, 2), x = sex, y = total),
vjust = -0.5) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey'))
By searching the Internet for about two days, I have finished the work!
sex <- c('M','F')
y0 <- c(26287942,16234000)
y1 <- c(9134784, 4406645)
y0 <- y0*10^{-7}
y1 <- y1*10^{-7}
ratio <- y1/(y0+y1)
ratio <- round(ratio,2)
m <- t(matrix(c(y0,y1),ncol=2))
colnames(m) <- c(as.character(sex))
df <- as.data.frame(m)
df <- cbind(c('0','1'),df)
colnames(df)[1] <- 'observations'
df
df_long <- pivot_longer(df, cols = as.character(sex))
names(df_long) <- c('state','sex','observations')
df_r <- as.data.frame(df_long)
df_r <- data.frame(df_r,ratio=rep(ratio,2))
ggplot(data = df_r) +
geom_col(aes(x =sex, y = observations, fill = state))+
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill=NULL) )+
geom_line(aes(x=sex,y=ratio*10),group=1)+
geom_point(aes(x=sex,y=ratio*10))+
geom_text(aes(x=sex,y=ratio*10+0.35),label=rep(ratio,2))+
scale_y_continuous(name =expression(paste('observations(','\u00D7', 10^7,')')),
sec.axis = sec_axis(~./10,name='ratio'))
The output:
Here an example of my data:
mydf <- tibble(
ID = c(1, 2, 9,4,5,15),
M1 = c(60,50,40,20,30,45),
M2 = c(90,80, 30, 33,70,40)
)
I want to insert only IDs in the plot in which M1<M2.
Here are the codes that I have used:
library(ggrepel)
res1<- mydf %>%
filter(M1< M2)
p <- ggplot(data = mydf, aes(x = M1, y = M2,label = ID))+
geom_text_repel() +
geom_point(data=res1,
aes(x=M1,y= M2),
color='red',
size=3)
I want to remove IDs 9,15 as M1>M2.
Does this give you the desired plot?
mydf <- mydf %>%
mutate(M1lower = M1 < M2)
res1<- mydf %>%
filter(M1lower == T)
ggplot(data = mydf, aes(x = M1, y =M2,label = ID, color = M1lower))+
geom_point(size=3) +
geom_text_repel(data=res1,
aes(x=M1,y= M2))
Recently I discovered the function geom_table(), from ggpmisc package, which allows you to put a table inside a plot. But I don't know how to put different tables into a grid plot.
I have this df and plot:
library(lubridate)
library(ggplot2)
library(ggpmisc)
Date <- c("2010-01-28", "2010-02-28", "2010-03-28",
"2010-04-28", "2010-05-28", "2010-06-28",
"2010-07-28", "2010-08-28", "2010-09-28",
"2010-10-28")
Date <- as_date(Date)
Country <- rep("Japan", 10)
A <- runif(10, min=30, max=90)
B <- runif(10, min = 1, max = 15)
df <- data.frame(Date, Country, A, B)
df %>% pivot_longer(-c(Date, Country)) %>%
ggplot(aes(x=Date,y=value,group=1,color=Country))+
geom_line(size = 0.9) +
facet_grid(name~Country, scales = "free", switch = "y")
I also have these two tables, tableA and tableB:
Time <- c("Today", "Yesterday", "One week ago")
Value_A <- 10:12
Value_B <- 1:3
tableA <- data.frame(Time, Value_A)
tableB <- data.frame(Time, Value_B)
How I put tableA in the top graph and tableB in the bottom graph?
I appreciate it if someone can help :)
You need to create a little data frame that hosts your tableA and tableB in a list column:
d <- tibble(x = c(0.95, 0.95), y = c(0.95, 0.95),
name = c("A", "B"), tb = list(tableA, tableB))
df %>% pivot_longer(-c(Date, Country)) %>%
ggplot(aes(x=Date,y=value,group=1,color=Country))+
geom_line(size = 0.9) +
geom_table_npc(data = d, aes(npcx = x, npcy = y, label = tb)) +
facet_grid(name~Country, scales = "free", switch = "y")
I know I can add the same curve to each plot pane as a layer using lattice and latticeExtra R packages (see blow).
But suppose we wanted to add different curves to each plot pane.
For example, in the below example, I wonder how to add v1 only to the top plot, v2 to the bottom-left plot, and v3 to the bottom-right plot?
library(lattice)
library(latticeExtra)
set.seed(24)
v1 <- density(rnorm(1e3, 3.5))
v2 <- density(rnorm(1e3, 3))
v3 <- density(rnorm(1e3, 2.75))
foo <- xyplot((1:32*.01)~wt|gear , data = mtcars)
foo + layer(panel.polygon(v1, col = 2, alpha = 0.3))
Here is one option
foo +
layer(if(panel.number() == 3) {
panel.polygon(v1, col = 2, alpha = 0.3)
} else if(panel.number() == 1) {
panel.polygon(v2, col = 2, alpha = 0.3)
} else{
panel.polygon(v3, col = 2, alpha = 0.3)
})
data
library(lattice)
library(latticeExtra)
set.seed(24)
v1 <- density(rnorm(1e3, 3.5))
v2 <- density(rnorm(1e3, 3))
v3 <- density(rnorm(1e3, 2.75))
foo <- xyplot((1:32*.01)~wt|gear , data = mtcars)
You can do it with ggplot2 and facet_grid:
library(ggplot2)
library(dplyr)
# user data
v1 <- (rnorm(1e3, 3.5))
v2 <- (rnorm(1e3, 3))
v3 <- (rnorm(1e3, 2.75))
# Make df1 from user data and associate with add gear value in `mtcars`
df1 <- data.frame(wt = c(v1, v2, v3),
gear = as.factor(c(rep(3, 1000),
rep(4, 1000),
rep(5, 1000))))
# select data from mtcars and add user defined values (`val`)
df2 <- mtcars %>%
mutate(val = 1:32 * 0.01) %>%
remove_rownames() %>%
mutate(gear = as.factor(gear)) %>%
select(c(val, gear, wt))
ggplot(df2, aes(x = wt, y = val, #set up mapping with df2
fill = gear)) +
geom_density(data = df1, #make density plots of df1
aes(x = wt,
fill = gear),
inherit.aes = FALSE) + #next add points from df1
geom_point(data = df2, aes(x = wt, y = val), inherit.aes = FALSE) +
facet_grid(cols = vars(gear))
You can make the fill color more transparent by adding an alpha value to geom_density.
This question already has answers here:
Different breaks per facet in ggplot2 histogram
(4 answers)
Closed 8 years ago.
I would like to create multiple histograms within one plot (using facet_wrap).
This could be an example code:
df <- data.frame(p1 = rnorm(100,5,2), p2 = rnorm(100,80,20), group = rep(LETTERS[1:4],25))
library(ggplot2)
library(reshape)
plotData <- melt(df, id.vars = "group", measure.vars = c("p1","p2") )
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + geom_bar(position=position_dodge())
m <- m + facet_wrap( ~ variable,scales = "free_x")
print(m)
Now, I would like to modify the plot that it creates per parameter ("p1,"p2") let's say 10 bins.
Up to now, I could not find a way to do this as binwidth/breaks calculation should be dependent on a subset of data.
Is it possible at all?
I want to share my solution (taken from the answered question linked above) extended by the possibility to overlay the histograms with density curves scaled to histogram counts:
df <- data.frame(p1 = rnorm(1000,5,2), p2 = rnorm(1000,80,20), group = rep(LETTERS[1:4],25))
library(ggplot2)
library(reshape)
library(plyr)
plotData <- melt(df, id.vars = "group", measure.vars = c("p1","p2") )
nBins <- 10
groupedData <- dlply(plotData, .(variable))
groupedBinWidth <- llply(groupedData, .fun = function(data, nBins) {
r <- range(data$value, na.rm = TRUE, finite = TRUE)
widthOfBins = (r[2] - r[1])/nBins
if (is.na(widthOfBins) || is.infinite(widthOfBins) || (widthOfBins <= 0)) widthOfBins <- NULL
widthOfBins
}, nBins = nBins)
densData <- dlply(plotData, .(variable, group), .fun = function(subData){
param <- subData$variable[1]
group <- subData$group[1]
d <- density(subData$value)
bw <- groupedBinWidth[[param]]
data.frame(x = d$x, y = d$y * nrow(subData) * bw , group = group, variable = param)
})
hls <- mapply(function(x, b) geom_bar(aes(x = value), position = position_dodge(), data = x, binwidth = b),
groupedData, groupedBinWidth)
dLay <- mapply(function(data) geom_density(data = data, aes(x = x, y = y), stat = "identity", fill = NA, size = 1),
densData)
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + hls
m <- m + dLay
m <- m + facet_wrap( ~ variable,scales = "free")
print(m)
Try this - really ugly code, but works if I understand you correctly. You might want to play with geom_density and maybe remove fill to make it more readable.
nbin<- 5
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + geom_histogram(data = subset(plotData, variable == "p1"), binwidth=diff(range(subset(plotData, variable == "p1")$value))/nbin)
m <- m + geom_histogram(data = subset(plotData, variable == "p2"), binwidth=diff(range(subset(plotData, variable == "p2")$value))/nbin)
m <- m + facet_wrap( ~ variable,scales = "free_x")
print(m)