Graph with two y axes in r - r

I was searching for a code to create a nice graph with two y axes. However i couldn't find a solution which helped me. (I am not using r often)
I have the time as x variable where i want to plot value1 and value2 on the right and left y achses respectively.
My data are in one dataset, so that i want to graph datasat$time on x axis and data$value1 on y left axis and data$value2 on right y axis. The way to illustrate should be a line with two different colours for each (data$value1 and data$value2). In addition there should be a legend for the lines.
Can someone help me with it?

Please, find my data w below.
Here is a script on how a dobbelt y-axis may be integrated using ggplot
ggplot() +
geom_bar(mapping = aes(x = w$WHO[w$Death==1]),
stat = "count", alpha=0.2, colour="#1C73C2",
fill="#ECF0F9") +
scale_y_continuous(name = "Number of deaths", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Mortality rate per 100 person-yrs", breaks=yaks, labels=c("0","5","10","15","20","25","30","35","40","45"))) +
scale_x_continuous(name="", breaks = c(1,2,3,4), labels =c("\nWHO-I\nn=37","\nWHO-II\nn=29","\nWHO-III\nn=19","\nUnknown\nn=25")) +
coord_cartesian(ylim=c(0, 18)) +
geom_point(mapping = aes(x = 1, y = 3.329993), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 12.424504), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 3, y = 17.23519), size=5, alpha=0.7,shape=18, colour="red") +
geom_point(mapping = aes(x = 4, y = 4.549763), size=5, alpha=0.7, shape=18, colour="red") +
annotate("text", x = c(1,2,3,4) , y = c(3.329993+1.3, 12.424504+1.3, 17.23519+1.3,4.549763+1.3 ), label = c("8.3","31.1","43.1","11.4"), col="red", fontface=2, cex=4) +
theme(axis.text.y.right = element_text(color = "red", size = 11),
axis.title.y=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.y = element_text(color = "#1C73C2", size = 11),
axis.title.y.right=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 0, b = 0, l = 15)),
axis.text.x = element_text(color = "grey20", size = 11))
My data w
w <- structure(list(WHO = c(1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L,
3L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 4L, 4L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 4L, 4L,
4L, 2L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
3L, 4L, 3L, 4L, 3L), response = c(0L, 1L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, NA, 1L), Death = c(0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, NA, 1L)), class = "data.frame", row.names = c(NA,
-111L))

Related

How can I fix the runtime error in ecdf function in R?

When I run this code-
a<- read.delim(file.choose("data.txt"))
d<-sort(a$d)
plot(d, sort(ecdf(d)(d)),type="s", lty=2,col="red", ylab= "P(X<=x)",ylim= 0:1)
it makes me make this mistake-
Error in ecdf(d) : 'x' must have 1 or more non-missing values
help?
I ran your code and it seems to be alright. I've just changed the second line of your code, because the only column provided in your data was named as x, instead of d.
Check it out:
# load data
a = structure(list(x = c(4L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 4L, 1L, 2L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 3L, 0L, 5L, 2L, 2L, 1L, 0L, 0L, 2L, 0L, 0L,
0L, 1L, 3L, 3L, 0L, 0L, 0L, 2L, 0L, 2L, 1L, 1L, 4L, 4L,
0L, 1L, 3L, 1L, 0L, 2L, 1L, 2L, 0L, 0L, 0L, 1L, 0L, 1L,
6L, 0L, 2L, 2L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 0L, 3L, 0L,
3L, 0L, 4L, 3L, 2L, 2L, 2L, 1L, 3L, 0L, 3L, 2L, 0L, 1L,
2L, 1L)), class = "data.frame", row.names = c(NA, -100L))
# sort x column (the only column)
d = sort(a$x)
# plot
plot(d, sort(ecdf(d)(d)), type = "s", lty = 2, col = "red",
ylab = "P(X<=x)", ylim = 0:1)
Output:

How to color outline differently from fill in histogram using ggplot / R? [duplicate]

This question already has an answer here:
Manually colouring plots with `scale_fill_manual` in ggplot2 not working
(1 answer)
Closed 3 years ago.
Please find My data q below
I have produced the following plot:
By using the script:
library(tidyverse)
w %>%
as_tibble() %>%
mutate(Studie=as.character(Studie),
best.resp =as.factor(best.resp)) %>%
bind_rows(., mutate(., Studie="all")) %>%
count(Studie, best.resp) %>%
ggplot(aes(Studie, n, fill= best.resp)) +
scale_fill_manual(values = c("green", "purple", "yellow")) +
scale_colour_manual(values = c("blue", "red","orange")) +
geom_col(position = position_dodge2(preserve = "single", padding = 0))
I want the outline around each bar to have one set of colors while the fill to have another set of colors. As you can see, I tried using scale_fill_manual and scale_colour_manual, however, that does not solve my problem.
I have attached a picture illustrating what I mean by outlines having one color and the fill another color:
My data
q <- structure(list(Studie = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), best.resp = c(0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 2L)), .Names = c("Studie",
"best.resp"), class = "data.frame", row.names = c(NA, -106L))
You need to map a variable to the colour aesthetic (in aes):
ggplot(aes(Studie, n, fill= best.resp, colour = best.resp)
I think in your code you have a w %>% is it supposed to be q?
You need to specify color in your aes command. Right now, you just have the fill-- so the scale color manual later, doesn't apply to anything.
q %>%
as_tibble() %>%
mutate(Studie=as.character(Studie),
best.resp =as.factor(best.resp)) %>%
bind_rows(., mutate(., Studie="all")) %>%
count(Studie, best.resp) %>%
ggplot(aes(Studie, n, color = best.resp, fill= best.resp)) +
scale_fill_manual(values = c("green", "purple", "yellow")) +
scale_colour_manual(values = c("blue", "red","orange")) +
geom_col(position = position_dodge2(preserve = "single", padding = 0))

How can I add specific value on x-axis in ggsurvplot/survminer in R?

I want 56 to show on the x-axis, but I can't figure it out.
I have the following script. I have tried to add the following to the script xlim = c(seq(0,100, by=10),56) but that does not seem to work.
I have tried to google it and I have read on R-documentation. I hope you can help.
library(survival)
library(survminer)
library(ggplot2)
fit <- survfit(Surv(p$time.recur.months, p$recurrence) ~ p$simpson.grade,
conf.type="log", data=p)
j <- ggsurvplot(
fit,
data = p,
fun="cumhaz",
risk.table = TRUE,
pval = TRUE,
pval.coord = c(0, 0.25),
conf.int = F,
legend.labs=c("Simpson Grade 1" ,"Simpson Grade 2", "Simpson Grade 3",
"Simpson Grade 4"),
size=c(0.7,0.7,0.7,0.7),
xlim = c(0,100),
alpha=c(0.7),
break.time.by = 10,
xlab="Time in months",
#ylab="Survival probability",
ggtheme = theme_gray(),
risk.table.y.text.col = T,
risk.table.y.text = TRUE,
ylim=c(0,0.5),
cumevents=T,
palette="Set1"
)
# My Data
p <- structure(list(recurrence = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), time.recur.months = c(NA, NA,
NA, NA, NA, NA, 92L, NA, NA, NA, 74L, NA, NA, NA, 2L, 8L, NA,
NA, NA, NA, 58L, NA, NA, NA, NA, NA, 3L, NA, 4L, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 39L, NA, NA, NA, NA, 15L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 12L, 56L, 57L, NA, NA, 49L, 17L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9L, NA,
89L, NA, NA, NA, 8L, 6L, 8L, 4L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 60L, NA, NA, 38L, NA, NA, NA, NA, NA, 90L,
NA, 58L, 54L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 53L, NA, NA, 124L, NA, NA,
NA, NA, NA, NA, 7L, NA), simpson.grade = c(3L, 1L, 1L, 2L, 4L,
1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 3L, 2L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 4L, 3L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
3L, 4L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 4L, 1L, 1L,
1L, 2L, 1L, 2L, 4L, 4L, 1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 4L,
4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 3L, 1L, 1L,
1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 2L, 4L, 1L, 4L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L)), .Names = c("recurrence",
"time.recur.months", "simpson.grade"), class = "data.frame", row.names
= c(NA,
-176L))
j is a ggsurvplot object, which is in turn a list of other objects. You can change the ggplot object at j$plot. The following will add 56 to the x-axis labels:
j$plot <- j$plot +
scale_x_continuous(breaks = sort(c(seq(0, 100, 10), 56)))
j
Personally I don't think it's a good look, as I expect evenly spaced breaks along the x-axis to match the tables below. If you want to draw attention to the position 56, I would suggest a vertical line and/or annotated label instead:
j$plot <- j$plot +
geom_vline(xintercept = 56, linetype = "dashed") +
annotate("text", x = 56, y = 0, label = "56", hjust = -0.5)
j

Change label name in ggsurvplot

I have attached My Data below.
I wish to relabel "Cumulative number of events", which seem to be the default text. I would like it to read: "Cumulative number of recurrences". I can't seem to figure out how to change it - is it even possible to change the text?
My graph looks like this:
The graph was computed with this
library(survival)
library(survminer)
library(ggplot2)
fit <- survfit(Surv(p$time.recur.months, p$recurrence) ~ p$simpson.grade, conf.type="log", data=p)
j <- ggsurvplot(
fit,
data = p,
fun="cumhaz",
risk.table = TRUE,
pval = TRUE,
pval.coord = c(0, 0.25),
conf.int = F,
legend.labs=c("Simpson Grade 1" ,"Simpson Grade 2", "Simpson Grade 3",
"Simpson Grade 4"),
size=c(0.7,0.7,0.7,0.7),
xlim = c(0,100),
alpha=c(0.7),
break.time.by = 10,
xlab="Time in months",
#ylab="Survival probability",
ggtheme = theme_gray(),
risk.table.y.text.col = T,
risk.table.y.text = TRUE,
ylim=c(0,0.5),
cumevents=T,
palette="Set1"
)
My Data
p <- structure(list(recurrence = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), time.recur.months = c(NA, NA,
NA, NA, NA, NA, 92L, NA, NA, NA, 74L, NA, NA, NA, 2L, 8L, NA,
NA, NA, NA, 58L, NA, NA, NA, NA, NA, 3L, NA, 4L, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 39L, NA, NA, NA, NA, 15L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 12L, 56L, 57L, NA, NA, 49L, 17L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9L, NA,
89L, NA, NA, NA, 8L, 6L, 8L, 4L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 60L, NA, NA, 38L, NA, NA, NA, NA, NA, 90L,
NA, 58L, 54L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 53L, NA, NA, 124L, NA, NA,
NA, NA, NA, NA, 7L, NA), simpson.grade = c(3L, 1L, 1L, 2L, 4L,
1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 3L, 2L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 4L, 3L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
3L, 4L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 4L, 1L, 1L,
1L, 2L, 1L, 2L, 4L, 4L, 1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 4L,
4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 3L, 1L, 1L,
1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 2L, 4L, 1L, 4L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L)), .Names = c("recurrence",
"time.recur.months", "simpson.grade"), class = "data.frame", row.names
= c(NA,
-176L))
To change title for cumevents table you need to use argument cumevents.title.
ggsurvplot(fit, p,
fun = "cumhaz",
risk.table = TRUE, cumevents = TRUE,
pval = TRUE, pval.coord = c(0, 0.25), conf.int = FALSE,
legend.labs = paste("Simpson Grade", 1:4),
xlab = "Time in months",
cumevents.title = "Cumulative number of recurrences",
size = rep(0.7, 4),
xlim = c(0, 100), ylim = c(0, 0.5),
alpha = 0.7,
break.time.by = 10,
ggtheme = theme_gray(),
risk.table.y.text.col = TRUE,
risk.table.y.text = TRUE,
palette = "Set1")

Express relations between three variables using ggplot2 in R

I have a data frame like this
structure(list(cli_exp = c(1L, 1L, 2L, 1L, 1L, 0L, 2L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 2L, 2L, 0L, 1L, 0L,
1L, 1L, 2L, 0L, 1L), vcs_exp = c(0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 1L,
1L, 0L, 0L, 0L, 2L, 1L, 0L), web_exp = c(2L, 2L, 2L, 1L, 0L,
0L, 1L, 2L, 0L, 0L, 3L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 2L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 2L, 0L, 0L)), .Names = c("cli_exp", "vcs_exp",
"web_exp"), row.names = c(NA, 30L), class = "data.frame")
I want to use ggplot2 to express the relation between these three variables and tried the simple point plot
ggplot(data = data) +
geom_point(mapping = aes(x = web_exp, y = vcs_exp, color = cli_exp))
But apparently, there are many overlapping data points, which are not suitable for point display. Are there any better ways?
I would use ggpairs from GGally package
tmp_df <- structure(list(cli_exp = c(1L, 1L, 2L, 1L, 1L, 0L, 2L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 2L, 2L, 0L, 1L, 0L,
1L, 1L, 2L, 0L, 1L), vcs_exp = c(0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 1L,
1L, 0L, 0L, 0L, 2L, 1L, 0L), web_exp = c(2L, 2L, 2L, 1L, 0L,
0L, 1L, 2L, 0L, 0L, 3L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 2L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 2L, 0L, 0L)), .Names = c("cli_exp", "vcs_exp",
"web_exp"), row.names = c(NA, 30L), class = "data.frame")
library(GGally)
ggpairs(tmp_df,
upper = list(continuous = wrap("cor", size = 10)),
lower = list(continuous = "smooth"))
Edit: use pairs from base R
pairs(tmp_df)
Use pairs.panels from psych package
library(psych)
pairs.panels(tmp_df,
method = "pearson",
density = TRUE,
ellipses = TRUE
)
As you mentioned, the points overlap, so some points aren't visible when using geom_point.
ggplot(data = df, aes(x = web_exp, y = vcs_exp, color = cli_exp)) +
geom_point()
This can be solved by adding a small amount of jitter. Also, making the points slightly transparent will make any overlaps more clear.
ggplot(data = df, aes(x = web_exp, y = vcs_exp, color = cli_exp)) +
geom_jitter(width = 0.05, height = 0.05, alpha = 0.8)

Resources