I'm trying to check the correlation of a bunch of variables and wanted to create a graph(s) of all the dependent variables on my response.
Price <- c(10,11,22,15,15)
Var1 <- c(2,3,12,5,17)
Var2 <- c(3,3,12,16,7)
Var3 <- c(2,5,2,5,18)
data <- data.frame(Var1,Var2,Var3,Price)
I was thinking something like this would work ;
variables <- c('Var1', 'Var2', 'Var3')
for (i in variables){
plot <- ggplot(data=data, aes(x = i, y=Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')
print(plot)
}
But it only prints out the response for variable 3 without any values of x.
As i in your loop is a character, you need to call it with get(i) in your ggplot:
for (i in variables){
plot <- ggplot(data=data, aes(x = get(i), y=Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')
print(plot)
}
will work.
Two alternatives to have the 3 graphs together:
alternative 1
Long format, and facet_wrap:
library(tidyr)
pivot_longer(data,paste0("Var",1:3)) %>%
ggplot(aes(value,Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')+
facet_wrap(~name)
second alternative
You could try to use the wonderful {patchwork} package also:
plot_list <- lapply(variables,function(i){
ggplot(data=data, aes(x = get(i), y=Price))+
geom_point(shape=16, color="dodgerblue")+
geom_smooth(method=lm, color='Black')+
labs(x = i)
})
library(patchwork)
wrap_plots(plot_list)
par works in low-level plotting.
par(mfrow=c(1, 3))
with(data, lapply(names(data)[1:3], \(x) {
plot(data[c(x, 'Price')]); abline(lm(Price ~ get(x)))
}))
Data:
data <- structure(list(Var1 = c(2, 3, 12, 5, 17), Var2 = c(3, 3, 12,
16, 7), Var3 = c(2, 5, 2, 5, 18), Price = c(10, 11, 22, 15, 15
)), class = "data.frame", row.names = c(NA, -5L))
Related
I would like to link variables I have in a dataframe i.e. ('prop1', 'prop2', 'prop3') to specific colours and shapes in the plot. However, I also want to exclude data (using dplyr::filter) to customise the plot display WITHOUT changing the points and shapes used for a specific variable. A minimal example is given below.
library(ggplot2)
library(dplyr)
library(magrittr)
obj <- c("cmpd 1","cmpd 1","cmpd 1","cmpd 2","cmpd 2")
x <- c(1, 2, 4, 7, 3)
var <- c("prop1","prop2","prop3","prop2","prop3")
y <- c(1, 2, 3, 2.5, 4)
col <- c("#E69F00","#9E0142","#56B4E9","#9E0142","#56B4E9")
shp <- c(0,1,2,1,2)
df2 <- cbind.data.frame(obj,x,var,y,col,shp)
plot <- ggplot(data = df2 %>%
filter(obj %in% c(
"cmpd 1",
"cmpd 2"
)),
aes(x = x,
y = y,
colour = as.factor(var),
shape = as.factor(var))) +
geom_point(size=2) +
#scale_shape_manual(values=shp) +
#scale_color_manual(values=col) +
facet_grid(.~obj)
plot
However, when I redact cmpd1 (just hashing in code) the colour and shape of prop2 and prop3 for cmpd2 change (please see plot2).
To this end, I tried adding in scale_shape_manual and scale_color_manual to the code (currently hashed) and linked these to specific vars (col and shp) in the dataframe (df2), but the same problem arises that both the shape and color of these variables changes when excluding one of the conditions?
Any and all help appreciated.
Try something like this:
library(tidyverse)
obj <- c("cmpd 1","cmpd 1","cmpd 1","cmpd 2","cmpd 2")
x <- c(1, 2, 4, 7, 3)
var <- c("prop1","prop2","prop3","prop2","prop3")
y <- c(1, 2, 3, 2.5, 4)
df2 <- cbind.data.frame(obj,x,var,y)
col <- c("prop1" = "#E69F00",
"prop2" = "#9E0142",
"prop3" = "#56B4E9")
shp <- c("prop1" = 0,
"prop2" = 1,
"prop3" = 2)
plot <- ggplot(data = df2 %>%
filter(obj %in% c(
"cmpd 1",
"cmpd 2"
)),
aes(x = x,
y = y,
colour = var,
shape = var)) +
geom_point(size=2) +
scale_shape_manual(values=shp) +
scale_color_manual(values=col) +
facet_grid(.~obj)
plot
I am trying to come up with a way to consistently colour multiple tidygraph plots. Right now, the issue is, when I plot multiple plots to the screen at once, tidygraph chooses a different colour for each variable. hopefully my example below will explain the issue.
To begin, I create some data, turn them into tidygraph objects, and put them together into a list:
library(tidygraph)
library(ggraph)
library(gridExtra)
# create some data for the tbl_graph
nodes <- data.frame(name = c("x4", NA, NA),
label = c("x4", 5, 2))
nodes1 <- data.frame(name = c("x4", "x2", NA, NA, "x1", NA, NA),
label = c("x4", "x2", 2, 1, "x1", 2, 7))
edges <- data.frame(from = c(1,1), to = c(2,3))
edges1 <- data.frame(from = c(1, 2, 2, 1, 5, 5),
to = c(2, 3, 4, 5, 6, 7))
# create the tbl_graphs
tg <- tbl_graph(nodes = nodes, edges = edges)
tg_1 <- tbl_graph(nodes = nodes1, edges = edges1)
# put into list
myList <- list(tg, tg_1)
Then I have a plotting function that allows me to display all the plots at once. I do this using grid.arrange from the gridExtra package, like so:
plotFun <- function(List){
ggraph(List, "partition") +
geom_node_tile(aes(fill = name), size = 0.25) +
geom_node_label(aes(label = label, color = name)) +
scale_y_reverse() +
theme_void() +
theme(legend.position = "none")
}
# Display all plots
allPlots <- lapply(myList, plotFun)
n <- length(allPlots)
nRow <- floor(sqrt(n))
do.call("grid.arrange", c(allPlots, nrow = nRow))
This will produce something like this:
As you can see, it colours by the variable label for each individual plot. This results in the same variable label being coloured differently in each plot. For example, x4 in the first plot is red and in the second plot is blue.
I'm trying to find a way to make the colours for the variable's label consistent across all plots. Maybe using grid.arrange isn't the best solution!?
Any help is appreciated.
Since each plot doesn't know anything about the other plots, it's best to assign colors yourself. First you can extract all the node names and assign them a color
nodenames <- unique(na.omit(unlist(lapply(myList, .%>%activate(nodes) %>% pull(name) ))))
nodecolors <- setNames(scales::hue_pal(c(0,360)+15, 100, 64, 0, 1)(length(nodenames)), nodenames)
nodecolors
# x4 x2 x1
# "#F5736A" "#00B734" "#5E99FF"
We use scales::hue_pal to get the "default" ggplot colors but you could use whatever you like. Then we just need to customize the color/fill scales for the plots with these colors.
plotFun <- function(List, colors=NULL){
plot <- ggraph(List, "partition") +
geom_node_tile(aes(fill = name), size = 0.25) +
geom_node_label(aes(label = label, color = name)) +
scale_y_reverse() +
theme_void() +
theme(legend.position = "none")
if (!is.null(colors)) {
plot <- plot + scale_fill_manual(values=colors) +
scale_color_manual(values=colors, na.value="grey")
}
plot
}
allPlots <- lapply(myList, plotFun, colors=nodecolors)
n <- length(allPlots)
nRow <- floor(sqrt(n))
do.call("grid.arrange", c(allPlots, nrow = nRow))
I am plotting some data which needs to be labelled with LaTeX expressions, see this small reproducible example. I have a separate list which contains the LaTeX labels for treatment1 and treatment2, to avoid changing the underlying data:
## Required packages
library(tidyverse)
library(latex2exp)
## LaTeX labels
labs <- list(treatment1 = c(unname(TeX("$\\textit{Avo}cado$")), unname(TeX("$Ban_{ana}")) ),
treatment2 = c(unname(TeX("$\\textit{C}at$")), unname(TeX("$D_{og}$")) ) )
## Dummy data frame
df <- data.frame(treatment1 = factor(c(rep("A", 5), rep("B", 5))),
treatment2 = factor(c(rep(c("C", "D"), 5))),
var1 = c(1, 4, 5, 7, 2, 8, 9, 1, 4, 7),
var2 = c(2, 8, 11, 13, 4, 10, 11, 2, 6, 10))
To apply the LaTeX labels to treatment2, the colour variable, I use the label argument in scale_colour_manual():
## Scatter plot with colour varying by treatment2
p <- ggplot(df, aes(x = var1, y = var2, colour = treatment2)) +
geom_point() +
scale_colour_manual(values = c("Black", "Blue"),
labels = labs$treatment2)
## Add facet by treatment1
p + facet_grid(treatment1 ~ .)
I've tried using the labeller argument in facet_grid() but both of these options result in an error:
p + facet_grid(treatment1 ~ ., labeller = labs$treatment1)
p + facet_grid(treatment1 ~ ., labeller = label_value(labs$treatment1))
## > Error in cbind(labels = list(), list(`{`, if (!is.null(.rows) || !is.null(.cols)) { :
## number of rows of matrices must match (see arg 2)
While trying to use the as_labeller() function loads the plot, but with no change to the facet labels:
p + facet_grid(treatment1 ~ ., labeller = as_labeller(labs$treatment1))
I have also tried to change the labels manually (although I would prefer to refer to a separate object due to the size of my actual data frame), which has no observable effect:
p + facet_grid(treatment1 ~ .,
labeller = labeller(treatment1 = c("A" = unname(TeX("$\\textit{Avo}cado$")),
"B" = unname(TeX("$Ban_{ana}")))))
I assume I have to write a new labeller function but I don't really know where to begin. Or am I going about this all wrong?
This works:
flabels <- function(level){
labels <- c(
A = unname(TeX("$\\textit{Avo}cado$")),
B = unname(TeX("$Ban_{ana}"))
)
labels[level]
}
p <- ggplot(df, aes(x = var1, y = var2, colour = treatment2)) +
geom_point()
p + facet_grid(treatment1 ~ .,
labeller = labeller(treatment1 = as_labeller(flabels, default = label_parsed)))
Edited below with complete and functioning code:
I am trying to create a timeline similar to the one this code creates from the Timeline package, however, the options are not very flexible. For example, I would like to create space between each bar so they are not touching. Also, I am wondering if there is a way to add the "End_Status" column to the graph so that it it is obvious that the data stops there because the animal died. Any help is greatly appreciated.
Example dataset:
df <- data.frame(id = c(rep(1201, 10), rep(1202, 14), rep(1203, 6), rep(1204, 22)),
date = c(seq(1,5,1), seq(5,7,1), seq(7,8,1), seq(2,5,1), seq(7,9,1), seq(11,17,1), seq(1,8,1), seq(8,12, 1), seq(12,26,1 )),
schedule = as.factor(c(rep(1, 5,), rep(2, 3), rep(3, 6),
rep (1, 3), rep (2, 2), rep(3, 5),
rep(1,8), rep(2, 5), rep(1,3), rep(3, 12))),
status = c(rep("", 9), "Mort", rep("", 41), "Mort"))
Code to get the output table I am interested in:
library("data.table")
library(plyr)
library(dplyr)
df<-as.data.table(df)
z <- df[, unique(id)]
################
value_change_first <- function(x,a) { for(i in 1:length(x[,schedule])) {
ifelse(x[a,schedule] == x[a+1,schedule],
x <- x[-(a+1)],
a <- a+1)}
return(x)
}
value_change_second <- function(x,a) {
for(i in 1:length(x[,schedule])) {
ifelse(x[a,schedule] == x[a+1,schedule],
x <- x[-(a)],
a <- a+1)}
return(x)
}
#################
output_1 <- c()
for(i in 1:length(z)){
ids <- df[df$id==z[i],]
out<-value_change_first(ids,1)
output_1<-as.data.frame(rbind(output_1, out))}
#################
output_2 <- c()
for(i in 1:length(z)){
ids <- df[df$id==z[i],]
out<-value_change_second(ids,1)
output_2<-as.data.frame(rbind(output_2, out))}
################
output_1$End_Date <- output_2$date
output_1$End_Status <- output_2$status
names(output_1)[names(output_1)=="date"] <- "Start_Date"
output <- output_1[c(1:2, 5, 3, 6)]
From here I can use the Timeline Package to get something close to what I want:
require(timeline)
tl <- timeline(output,
label.col=names(output)[4],
text.color= NA,
group.col=names(output)[1],
start.col=names(output)[2],
end.col = names(output)[3])
tl + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
My question is how to build something similar in ggplot. Furthermore, I want to specifically add the "Mort" message at a given date by individual from the output dataframe.
I ended up getting this to work like I want:
### creating a end status column that includes mortalities and failures
output$End_Status_Date<-NA
for(i in 1:nrow(output)){
if(output$End_Status[i] == "Mort"){
output$End_Status_Date[i]=as.character(output$End_Date)[i]
}
}
for(i in 1:nrow(output)){
if(output$End_Status[i] == "Failure"){
output$End_Status_Date[i]=as.character(output$End_Date)[i]
}
}
### data structuring
output$id<-as.factor(output$id)
output$End_Status_Date<-as.numeric(output$End_Status_Date)
output$End_Status[output$End_Status == ""] <- NA
output$End_Status<-as.character(output$End_Status)
output$End_Status<-as.factor(output$End_Status)
library(ggplot2)
g2 <- ggplot() +
geom_segment(data=output, aes(x=Start_Date, xend=End_Date, y=id, yend=id, color=schedule), linetype=1, size=2) +
geom_point(data=subset(output, is.na(End_Status)==FALSE),
mapping=aes(x=End_Status_Date, y=id, shape=End_Status, fill=End_Status), size=4)+
scale_colour_manual(values=c("blue4", "chartreuse4", "darkmagenta"))+
scale_fill_manual(values=c("white", "red"))+
scale_shape_manual(values=c(21,24))+
xlab("Time")+
ylab("Individuals")+
theme_bw() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
g2
You did most of the work already. The key here is just to use geom_rect and take advantage of your y-axis labels to set the ymin and ymax values.
ggplot(output, aes(xmin = Start_Date, xmax = End_Date,
ymin = as.numeric(id) - 1, ymax = as.numeric(id),
fill = schedule)) +
geom_rect() +
geom_text(aes(x = End_Date, y = id, label = End_Status))
ID <- 1:10
group <- c(1,1,1,2,2,2,3,3,3,3)
var1 <- c(6:15)
var2 <- c(7:16)
var3 <- c(6:11, NA, NA, NA, NA)
var4 <- c(4:9, NA, NA, NA, NA)
data <- data.frame(ID, group, var1, var2, var3, var4)
library(dplyr)
data %>% group_by(group) %>% boxplot(var1, var2)
The last line does not work as i wish. The idea is to get 4 boxplots in one graphic. Two for each variable. Maybe i need to use ggplot2?
You need to reorganize the data if you want to get both variables in the same plot. Here is a ggplot2 solution:
# load library
library(ggplot2)
library(tidyr)
library(ggthemes)
# reorganize data
df <- gather(data, "ID","group")
#rename columns
colnames(df) <- c("ID","group","var","value")
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(group), y=value, fill=factor(var)), position=position_dodge(1)) +
scale_x_discrete(breaks=c(1, 2, 3), labels=c("A", "B", "C")) +
theme_minimal() +
scale_fill_grey()
Making boxplots with the same width is a whole different question (solution here), but one simple alternative would be like this:
# recode column `group` in the `data.frame`.
df <- transform(df, group = ifelse(group==1, 'A', ifelse(group==2, 'B', "C")))
# plot
ggplot(data=df) +
geom_boxplot( aes(x=factor(var), y=value, fill=factor((var))), position=position_dodge(1)) +
geom_jitter(aes(x=factor(var), y=value, color=factor((var)))) +
facet_grid(.~group, scales = "free_x") +
theme_minimal()+
scale_fill_grey() +
theme(axis.text.x=element_blank(),
axis.title.x=element_blank(),
axis.ticks=element_blank())
You might try melting the data frame (mentioned in comment by #lukeA) first and then sticking to base graphics. ggplot2 or lattice are other good options.
library(reshape2)
DF <- melt(data, id.vars = c("ID", "group"), measure.vars = c("var1", "var2"))
boxplot(value ~ group + variable, DF)
Alternate lattice code, also using DF:
bwplot(~ value | variable + group, data = DF)
Alternate ggplot2 code, also using DF:
ggplot(DF, aes(x = factor(group), y = value, fill = variable)) + geom_boxplot()
Although quite late, a found a great base-R solution here
# Create some data, e.g. from https://en.wikipedia.org/wiki/One-way_analysis_of_variance#Example
df <- as.data.frame(matrix(c(6, 8, 13, 8, 12, 9, 4, 9, 11, 5, 11, 8, 3, 6, 7, 4, 8, 12),ncol = 3, byrow = TRUE))
df <- reshape(data = df, direction = "long", idvar=1:3, varying=1:3, sep = "", timevar = "Treatment")
df$Treatment <- as.factor(df$Treatment)
rownames(df) <- NULL
par(mfrow = c(2, 1))
par(mar=c(1,4,4,2) + 0.1) # mar=c(b,l,t,r)
boxplot(V ~ Treatment, data = df, xlab = NULL, xaxt = "n",
ylab = "V", main = "One-way anova with 3 different levels of one factor")
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "jitter", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mar=c(5,4,0,2) + 0.1)
boxplot(V ~ Treatment, data = df, xlab = "Treatment",
ylab = "V", main = NULL)
stripchart(V ~ Treatment, # Points
data = df, # Data
method = "overplot", # Random noise
pch = 19, # Pch symbols
col = 4, # Color of the symbol
vertical = TRUE, # Vertical mode
add = TRUE) # Add it over
par(mfrow = c(1, 1))
Result: