I have a sample dataset as below:
Day<-c("1","1","1","2","2","2")
Group<-c("Blue","Red","Green","Blue","Red","Green")
UV<-c("3","4","2","5","4","6")
Rain<-c("10","11","12","15","16","17")
dmean<-data.frame(Day,Group,UV,Rain)
Day<-c("1","1","1","1","1","1","2","2","2","2","2","2")
Group<-c("Blue","Blue","Red","Red","Green","Green","Blue","Blue","Red","Red","Green","Green")
UV<-c("3","3.1","4","4.1","2","2.2","5","5.1","4","4.2","6.1","6.1")
Rain<-c("10","10.1","11","11","12","12.2","15","15.2","16","16.1","17","17.2")
dpoints<-data.frame(Day,Group,UV,Rain)
library(ggplot2)
plot.ts <- function(yvar) {
ggplot(dmean, aes(x = Day, y = .data[[yvar]], group = Group, colour = Group)) +
geom_line(size = 1)+
geom_point(data = dpoints, aes(y = .data[[yvar]]), alpha = .2) +
facet_wrap(~Group, ncol = 1)
}
lapply(names(dpoints)[3:4], plot.ts)
Is it possible to modify the function "plot.ts" so I can set y axis limits for each "yvar" column, in this case "UV" and "Rain"? I have a larger dataset where there are more columns besides "UV" and "Rain" to apply the "plot.ts" function on.
Thank you.
We can use the {facetscales} package. We need to create a list with each column name and can give each column name its own scale_ function. If you want to pass each iteration its own list, then we have to create a list of lists and use mapply or purrr::map2.
library(ggplot2)
library(facetscales)
plot.ts <- function(yvar, ylimits = NULL) {
p <- ggplot(dmean, aes(x = Day, y = .data[[yvar]], group = Group, colour = Group)) +
geom_line(size = 1)+
geom_point(data = dpoints, aes(y = .data[[yvar]]), alpha = .2)
if(!is.null(ylimits)) {
p + facet_grid_sc(rows = vars(Group), scales = list(y = ylimits))
} else
p + facet_grid(rows = vars(Group))
}
scales_y <- list(
Blue = scale_y_continuous(limits = c(0,50)),
Green = scale_y_continuous(limits = c(12,14)),
Red = scale_y_continuous(limits = c(10,25))
)
lapply(names(dpoints)[3:4], plot.ts, ylimits = scales_y)
#> [[1]]
#>
#> [[2]]
Created on 2022-05-02 by the reprex package (v0.3.0)
Below the slightly altered data:
Day<-c("1","1","1","2","2","2")
Group<-c("Blue","Red","Green","Blue","Red","Green")
UV<-c("3","4","2","5","4","6")
Rain<-c("10","11","12","15","16","17")
dmean<-data.frame(Day,Group,UV,Rain)
dmean$UV <- as.numeric(dmean$UV)
dmean$Rain <- as.numeric(dmean$Rain)
Day<-c("1","1","1","1","1","1","2","2","2","2","2","2")
Group<-c("Blue","Blue","Red","Red","Green","Green","Blue","Blue","Red","Red","Green","Green")
UV<-c("3","3.1","4","4.1","2","2.2","5","5.1","4","4.2","6.1","6.1")
Rain<-c("10","10.1","11","11","12","12.2","15","15.2","16","16.1","17","17.2")
dpoints<-data.frame(Day,Group,UV,Rain)
dpoints$UV <- as.numeric(dmean$UV)
dpoints$Rain <- as.numeric(dmean$Rain)
Related
I'm trying to write my own Central Limit Theorem demonstration using ggplot2 and am unable to get my stat_function to display a changing normal distribution.
below is my code, I want the normal distribution in stat_function to transition through different states; specifically, I'm hoping for it to change the standard deviation to correspond with each value in dataset. Any help would be greatly appreciated.
#library defs
library(gganimate)
library(ggplot2)
library(transformr)
#initialization for distribution, rolls, and vectors
k = 2
meanr = 1/k
sdr = 1/k
br = sdr/10
rolls <- 200
avg <- 1
dataset <- 1
s <- 1
#loop through to create vectors of sample statistics from 200 samples of size i
#avg is sample average, s is standard deviations of sample means, and dataset is the indexes to run the transition states
for (i in c(1:40)){
for (j in 1:rolls){
avg <- c(avg,mean(rexp(i,k)))
}
dataset <- c(dataset, rep(i,rolls))
s <- c(s,rep(sdr/sqrt(i),rolls))
}
#remove initialized vector information as it was only created to start loops
avg <- avg[-1]
rn <- rn[-1]
dataset <- dataset[-1]
s <- s[-1]
#dataframe
a <- data.frame(avgf=avg, rnf = rn,datasetf = dataset,sf = s)
#plot histogram, density function, and normal distribution
ggplot(a,aes(x=avg,y=s))+
geom_histogram(aes(y = ..density..), binwidth = br,fill='beige',col='black')+
geom_line(aes(y = ..density..,colour = 'Empirical'),lwd=2, stat = 'density') +
stat_function(fun = dnorm, aes(colour = 'Normal', y = s),lwd=2,args=list(mean=meanr,sd = mean(s)))+
scale_y_continuous(labels = scales::percent_format()) +
scale_color_discrete(name = "Densities", labels = c("Empirical", "Normal"))+
labs(x = 'Sample Average',title = 'Sample Size: {closest_state}')+
transition_states(dataset,4,4)+ view_follow(fixed_x = TRUE)
I think it's difficult to use stat_function here because the dnorm function that you are passing includes a grouped variable (mean(s)). There is no way to indicate that you wish to group s by the dataset column, and the transition_states function doesn't filter the whole data frame. You could use transition_filter to filter the whole data frame, but this would be laborious.
It's not much work to just add a dnorm to your input data frame and plot it as a line, particularly since the rest of your code can be simplified substantially. Here's a fully reproducible example:
library(gganimate)
library(ggplot2)
library(transformr)
k <- 2
meanr <- sdr <- 1/k
br <- sdr/10
rolls <- 200
a <- do.call(rbind, lapply(1:40, function(i){
data.frame(avg = replicate(rolls, mean(rexp(i, k))),
dataset = rep(i, rolls),
x = seq(0, 2, length.out = rolls),
s = dnorm(seq(0, 2, length.out = rolls),
meanr, sdr/sqrt(i))) }))
ggplot(a, aes(x = avg, group = dataset)) +
geom_histogram(aes(y = ..density..), fill = 'beige',
colour = "black", binwidth = br) +
geom_line(aes(y = ..density.., colour = 'Empirical'),
lwd = 2, stat = 'density', alpha = 0.5) +
geom_line(aes(x = x, y = s, colour = "Normal"), size = 2, alpha = 0.5) +
scale_y_continuous(labels = scales::percent_format()) +
coord_cartesian(xlim = c(0, 2)) +
scale_color_discrete(name = "Densities", labels = c("Empirical", "Normal")) +
labs(x = 'Sample Average', title = 'Sample Size: {closest_state}') +
transition_states(dataset, 4, 4) +
view_follow(fixed_x = TRUE, fixed_y = TRUE)
I am creating animated plotly graph for my assignment in r, where I am comparing several models with various number of observations. I would like to add annotation showing what is the RMSE of the current model - this means I would like to have text that changes together with slider. Is there any easy way how to do that?
Here is my dataset stored on GitHub. There already is created variable with RMSE: data
The base ggplot graphic is as follows:
library(tidyverse)
library(plotly)
p <- ggplot(values_predictions, aes(x = x)) +
geom_line(aes(y = preds_BLR, frame = n, colour = "BLR")) +
geom_line(aes(y = preds_RLS, frame = n, colour = "RLS")) +
geom_point(aes(x = x, y = target, frame = n, colour = "target"), alpha = 0.3) +
geom_line(aes(x = x, y = sin(2 * pi * x), colour = "sin(2*pi*x)"), alpha = 0.3) +
ggtitle("Comparison of performance) +
labs(y = "predictions and targets", colour = "colours")
This is converted to plotly, and I have added an animation to the Plotly graph:
plot <- ggplotly(p) %>%
animation_opts(easing = "linear",redraw = FALSE)
plot
Thanks!
You can add annotations to a ggplot graph using the annotate function: http://ggplot2.tidyverse.org/reference/annotate.html
df <- data.frame(x = rnorm(100, mean = 10), y = rnorm(100, mean = 10))
# Build model
fit <- lm(x ~ y, data = df)
# function finds RMSE
RMSE <- function(error) { sqrt(mean(error^2)) }
library(ggplot2)
ggplot(df, aes(x, y)) +
geom_point() +
annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 2,
label = paste("RMSE", RMSE(fit$residuals)) )
There seems to be a bit of a problem converting between ggplot and plotly. However this workaround here shows a workaround which can be used:
ggplotly(plot) %>%
layout(annotations = list(x = 12, y = 13, text = paste("RMSE",
RMSE(fit$residuals)), showarrow = F))
Here's an example of adding data dependent text using the built in iris dataset with correlation as text to ggplotly.
library(plotly)
library(ggplot2)
library(dplyr)
mydata = iris %>% rename(variable1=Sepal.Length, variable2= Sepal.Width)
shift_right = 0.1 # number from 0-1 where higher = more right
shift_down = 0.02 # number from 0-1 where higher = more down
p = ggplot(mydata, aes(variable1,variable2))+
annotate(geom = "text",
label = paste0("Cor = ",as.character(round(cor.test(mydata$variable1,mydata$variable2)$estimate,2))),
x = min(mydata$variable1)+abs(shift_right*(min(mydata$variable1)-max(mydata$variable1))),
y = max(mydata$variable2)-abs(shift_down*(min(mydata$variable2)-max(mydata$variable2))), size=4)+
geom_point()
ggplotly(p) %>% style(hoverinfo = "none", traces = 1) # remove hover on text
I'm trying to plot 2 sets of data points and a single line in R using ggplot.
The issue I'm having is with the legend.
As can be seen in the attached image, the legend applies the lines to all 3 data sets even though only one of them is plotted with a line.
I have melted the data into one long frame, but this still requires me to filter the data sets for each individual call to geom_line() and geom_path().
I want to graph the melted data, plotting a line based on one data set, and points on the remaining two, with a complete legend.
Here is the sample script I wrote to produce the plot:
xseq <- 1:100
x <- rnorm(n = 100, mean = 0.5, sd = 2)
x2 <- rnorm(n = 100, mean = 1, sd = 0.5)
x.lm <- lm(formula = x ~ xseq)
x.fit <- predict(x.lm, newdata = data.frame(xseq = 1:100), type = "response", se.fit = TRUE)
my_data <- data.frame(x = xseq, ypoints = x, ylines = x.fit$fit, ypoints2 = x2)
## Now try and plot it
melted_data <- melt(data = my_data, id.vars = "x")
p <- ggplot(data = melted_data, aes(x = x, y = value, color = variable, shape = variable, linetype = variable)) +
geom_point(data = filter(melted_data, variable == "ypoints")) +
geom_point(data = filter(melted_data, variable == "ypoints2")) +
geom_path(data = filter(melted_data, variable == "ylines"))
pushViewport(viewport(layout = grid.layout(1, 1))) # One on top of the other
print(p, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
You can set them manually like this:
We set linetype = "solid" for the first item and "blank" for others (no line).
Similarly for first item we set no shape (NA) and for others we will set whatever shape we need (I just put 7 and 8 there for an example). See e.g. http://www.r-bloggers.com/how-to-remember-point-shape-codes-in-r/ to help you to choose correct shapes for your needs.
If you are happy with dots then you can use my_shapes = c(NA,16,16) and scale_shape_manual(...) is not needed.
my_shapes = c(NA,7,8)
ggplot(data = melted_data, aes(x = x, y = value, color=variable, shape=variable )) +
geom_path(data = filter(melted_data, variable == "ylines") ) +
geom_point(data = filter(melted_data, variable %in% c("ypoints", "ypoints2"))) +
scale_colour_manual(values = c("red", "green", "blue"),
guide = guide_legend(override.aes = list(
linetype = c("solid", "blank","blank"),
shape = my_shapes))) +
scale_shape_manual(values = my_shapes)
But I am very curious if there is some more automated way. Hopefully someone can post better answer.
This post relied quite heavily on this answer: ggplot2: Different legend symbols for points and lines
I have a dataframe a with three columns :
GeneName, Index1, Index2
I draw a scatterplot like this
ggplot(a, aes(log10(Index1+1), Index2)) +geom_point(alpha=1/5)
Then I want to color a point whose GeneName is "G1" and add a text box near that point, what might be the easiest way to do it?
You could create a subset containing just that point and then add it to the plot:
# create the subset
g1 <- subset(a, GeneName == "G1")
# plot the data
ggplot(a, aes(log10(Index1+1), Index2)) + geom_point(alpha=1/5) + # this is the base plot
geom_point(data=g1, colour="red") + # this adds a red point
geom_text(data=g1, label="G1", vjust=1) # this adds a label for the red point
NOTE: Since everyone keeps up-voting this question, I thought I would make it easier to read.
Something like this should work. You may need to mess around with the x and y arguments to geom_text().
library(ggplot2)
highlight.gene <- "G1"
set.seed(23456)
a <- data.frame(GeneName = paste("G", 1:10, sep = ""),
Index1 = runif(10, 100, 200),
Index2 = runif(10, 100, 150))
a$highlight <- ifelse(a$GeneName == highlight.gene, "highlight", "normal")
textdf <- a[a$GeneName == highlight.gene, ]
mycolours <- c("highlight" = "red", "normal" = "grey50")
a
textdf
ggplot(data = a, aes(x = Index1, y = Index2)) +
geom_point(size = 3, aes(colour = highlight)) +
scale_color_manual("Status", values = mycolours) +
geom_text(data = textdf, aes(x = Index1 * 1.05, y = Index2, label = "my label")) +
theme(legend.position = "none") +
theme()
There were example code for E on ggplot2 library:
theme_set(theme_bw())
dat = data.frame(value = rnorm(100,sd=2.5))
dat = within(dat, {
value_scaled = scale(value, scale = sd(value))
obs_idx = 1:length(value)
})
ggplot(aes(x = obs_idx, y = value_scaled), data = dat) +
geom_ribbon(ymin = -1, ymax = 1, alpha = 0.1) +
geom_line() + geom_point()
There is a question: How I can make in ggplot2 my first 10 lines in red and the rest lines in blue based on example? I tried to use some kind of layer syntax is, but it doesn't work.
First, add another column to your data frame dat. It has value 0 for the first 10 rows and 1 for the rest.
dat$group <- factor(rep.int(c(0, 1), c(10, nrow(dat)-10)))
Generate the plot:
library(ggplot2)
ggplot(aes(x = obs_idx, y = value_scaled), data = dat) +
geom_ribbon(ymin = -1, ymax = 1, alpha = 0.1) +
geom_line(aes(colour = group), show_guide = FALSE) +
scale_colour_manual(values = c("red", "blue")) +
geom_point()
The parameter show_guide = FALSE suppresses the legend for the red and blue lines.
OK, I could manage layers, the code is (not elegant, but works):
require(ggplot2)
value=round(rnorm(50,200,50),0)
nmbrs<-length(value) ## length of vector
obrv<-1:length(value) ## list of observations
#create data frame from the values
data_lj<-data.frame(obrv,value)
data_lj20<-data.frame(data_lj[1:20,1:2])
data_lj21v<-data.frame(data_lj[20:nmbrs,1:2])
#plot with ggplot
rr<-ggplot()+
layer(mapping=aes(obrv,value),geom="line",data=data_lj20,colour="red")+
layer(mapping=aes(obrv,value),geom="line",data=data_lj21v,colour="blue")
print(rr)