Express relations between three variables using ggplot2 in R - r

I have a data frame like this
structure(list(cli_exp = c(1L, 1L, 2L, 1L, 1L, 0L, 2L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 2L, 2L, 0L, 1L, 0L,
1L, 1L, 2L, 0L, 1L), vcs_exp = c(0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 1L,
1L, 0L, 0L, 0L, 2L, 1L, 0L), web_exp = c(2L, 2L, 2L, 1L, 0L,
0L, 1L, 2L, 0L, 0L, 3L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 2L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 2L, 0L, 0L)), .Names = c("cli_exp", "vcs_exp",
"web_exp"), row.names = c(NA, 30L), class = "data.frame")
I want to use ggplot2 to express the relation between these three variables and tried the simple point plot
ggplot(data = data) +
geom_point(mapping = aes(x = web_exp, y = vcs_exp, color = cli_exp))
But apparently, there are many overlapping data points, which are not suitable for point display. Are there any better ways?

I would use ggpairs from GGally package
tmp_df <- structure(list(cli_exp = c(1L, 1L, 2L, 1L, 1L, 0L, 2L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 2L, 2L, 0L, 1L, 0L,
1L, 1L, 2L, 0L, 1L), vcs_exp = c(0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 1L,
1L, 0L, 0L, 0L, 2L, 1L, 0L), web_exp = c(2L, 2L, 2L, 1L, 0L,
0L, 1L, 2L, 0L, 0L, 3L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 2L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 2L, 0L, 0L)), .Names = c("cli_exp", "vcs_exp",
"web_exp"), row.names = c(NA, 30L), class = "data.frame")
library(GGally)
ggpairs(tmp_df,
upper = list(continuous = wrap("cor", size = 10)),
lower = list(continuous = "smooth"))
Edit: use pairs from base R
pairs(tmp_df)
Use pairs.panels from psych package
library(psych)
pairs.panels(tmp_df,
method = "pearson",
density = TRUE,
ellipses = TRUE
)

As you mentioned, the points overlap, so some points aren't visible when using geom_point.
ggplot(data = df, aes(x = web_exp, y = vcs_exp, color = cli_exp)) +
geom_point()
This can be solved by adding a small amount of jitter. Also, making the points slightly transparent will make any overlaps more clear.
ggplot(data = df, aes(x = web_exp, y = vcs_exp, color = cli_exp)) +
geom_jitter(width = 0.05, height = 0.05, alpha = 0.8)

Related

How can I fix the runtime error in ecdf function in R?

When I run this code-
a<- read.delim(file.choose("data.txt"))
d<-sort(a$d)
plot(d, sort(ecdf(d)(d)),type="s", lty=2,col="red", ylab= "P(X<=x)",ylim= 0:1)
it makes me make this mistake-
Error in ecdf(d) : 'x' must have 1 or more non-missing values
help?
I ran your code and it seems to be alright. I've just changed the second line of your code, because the only column provided in your data was named as x, instead of d.
Check it out:
# load data
a = structure(list(x = c(4L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 4L, 1L, 2L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 3L, 0L, 5L, 2L, 2L, 1L, 0L, 0L, 2L, 0L, 0L,
0L, 1L, 3L, 3L, 0L, 0L, 0L, 2L, 0L, 2L, 1L, 1L, 4L, 4L,
0L, 1L, 3L, 1L, 0L, 2L, 1L, 2L, 0L, 0L, 0L, 1L, 0L, 1L,
6L, 0L, 2L, 2L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 0L, 3L, 0L,
3L, 0L, 4L, 3L, 2L, 2L, 2L, 1L, 3L, 0L, 3L, 2L, 0L, 1L,
2L, 1L)), class = "data.frame", row.names = c(NA, -100L))
# sort x column (the only column)
d = sort(a$x)
# plot
plot(d, sort(ecdf(d)(d)), type = "s", lty = 2, col = "red",
ylab = "P(X<=x)", ylim = 0:1)
Output:

Graph with two y axes in r

I was searching for a code to create a nice graph with two y axes. However i couldn't find a solution which helped me. (I am not using r often)
I have the time as x variable where i want to plot value1 and value2 on the right and left y achses respectively.
My data are in one dataset, so that i want to graph datasat$time on x axis and data$value1 on y left axis and data$value2 on right y axis. The way to illustrate should be a line with two different colours for each (data$value1 and data$value2). In addition there should be a legend for the lines.
Can someone help me with it?
Please, find my data w below.
Here is a script on how a dobbelt y-axis may be integrated using ggplot
ggplot() +
geom_bar(mapping = aes(x = w$WHO[w$Death==1]),
stat = "count", alpha=0.2, colour="#1C73C2",
fill="#ECF0F9") +
scale_y_continuous(name = "Number of deaths", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Mortality rate per 100 person-yrs", breaks=yaks, labels=c("0","5","10","15","20","25","30","35","40","45"))) +
scale_x_continuous(name="", breaks = c(1,2,3,4), labels =c("\nWHO-I\nn=37","\nWHO-II\nn=29","\nWHO-III\nn=19","\nUnknown\nn=25")) +
coord_cartesian(ylim=c(0, 18)) +
geom_point(mapping = aes(x = 1, y = 3.329993), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 12.424504), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 3, y = 17.23519), size=5, alpha=0.7,shape=18, colour="red") +
geom_point(mapping = aes(x = 4, y = 4.549763), size=5, alpha=0.7, shape=18, colour="red") +
annotate("text", x = c(1,2,3,4) , y = c(3.329993+1.3, 12.424504+1.3, 17.23519+1.3,4.549763+1.3 ), label = c("8.3","31.1","43.1","11.4"), col="red", fontface=2, cex=4) +
theme(axis.text.y.right = element_text(color = "red", size = 11),
axis.title.y=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.y = element_text(color = "#1C73C2", size = 11),
axis.title.y.right=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 0, b = 0, l = 15)),
axis.text.x = element_text(color = "grey20", size = 11))
My data w
w <- structure(list(WHO = c(1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L,
3L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 4L, 4L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 4L, 4L,
4L, 2L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
3L, 4L, 3L, 4L, 3L), response = c(0L, 1L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, NA, 1L), Death = c(0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, NA, 1L)), class = "data.frame", row.names = c(NA,
-111L))

SomersDelta: Error in as.table.default(x) : cannot coerce to a tabl in R

data example
sommer=structure(list(tub = c(1L, 2L, 0L, 2L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 2L, 1L, 0L, 0L, 1L, 0L, 2L, 1L, 1L, 0L, 0L, 1L, 0L, 0L,
2L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 3L, 0L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 3L,
2L, 0L, 1L, 0L, 3L, 2L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 3L, 1L, 1L,
3L, 1L), fq = c(1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 3L, 6L, 1L, 0L, 0L, 1L, 0L, 2L, 4L, 2L, 0L, 0L, 2L, 0L,
0L, 5L, 0L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 14L, 0L, 1L, 1L,
1L, 4L, 0L, 1L, 0L, 4L, 0L, 0L, 0L, 6L, 1L, 0L, 0L, 1L, 1L, 0L,
1L, 3L, 4L, 0L, 1L, 0L, 8L, 1L, 2L, 0L, 0L, 0L, 2L, 0L, 0L, 3L,
2L, 2L, 1L, 1L)), .Names = c("tub", "fq"), class = "data.frame", row.names = c(NA,
-75L))
i want calculate SomersDelta
library("DescTools")
SomersDelta(sommer, direction = c("row", "column"))
and i get the error
Error in as.table.default(x) : cannot coerce to a table
also i want get spine plot
library("coin")
spineplot(sommer)
but i get the error
Error in margin.table(tab, 1) : 'x' is not an array
are there two errors interconnection?
How to calculate SomersDelta and get spine plot like this
spine plot
It requires a matrix as input. According to ?SomersDelta
x - a numeric vector or a table. A matrix will be treated as table.
Here, the dataset is a data.frame class. We can convert it to matrix (as.matrix) and it should work fine
SomersDelta(as.matrix(sommer), direction = c("row", "column"))
#[1] -0.06137931

identifying rows in data frame that exhibit patterns

Below I have code with 3 columns: a group field, a open/close field for the store, and the rolling sum of 3 month opens for the store. I also have the desired solution output.
My dataset can be thought of as an employees availability. You can assume each row to be a different time period (hour, day,month, year, whatever). In the open/closed column I have whether or not the employee was present. The 3month rolling column is a sum of the previous rows.
What I want to identify is the non-zero values in this rolling sum column following a gap of at least 3 zero rows for that particular group. While not present in this dataset, you can assume that there might be more than one 'gap' of zeros present.
structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("A", "B"), class = "factor"), X0_closed_1_open = c(0L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L), X3month_roll_open = c(0L,
0L, 1L, 2L, 2L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L), desired_solution = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("no", "yes"), class ="factor")), .Names = c("Group", "X0_closed_1_open", "X3month_roll_open", "desired_solution"), class = "data.frame", row.names = c(NA,
-26L))
One option is:
res <- unsplit(
lapply(split(df1, df1$Group), function(x) {
rl <- with(x,rle(X3month_roll_open==0))
indx <- cumsum(c(0,diff(inverse.rle(within.list(rl,
values[values] <- lengths[values]>=3)))<0))
x$Flag <- indx!=0 & x[,3]!=0
x}),
df1$Group)
NOTE: Instead of 'yes/no', it may be better to have 'TRUE/FALSE' for easing subsetting.
identical(c('no', 'yes')[res$Flag+1L], as.character(res$desired_solution))
#[1] TRUE

plotting mean of variable versus matrix of conditions in R using ggplot2

I have a data.frame X with column X and a data.frame C with M binary values (0/1). Both data.frames have N rows (examples).
I would like to average X on each case 0/1 of each m out of M column of C.
When I plot this, I accept to get M*2 bars where x axis are the column names of each column in C and red/blue is for when catergory m (out of M) is either 0/1.
Can this be done using ggplot2?
Any other quick way to do that without for loops?
Result sketch:
*
* * *
* * * *
m1=0, m1=1, m2=0, m2=1 ,....
Thanks,
Hanan
data sample below:
aggregate(X, by = as.list(C), FUN=mean) will aggregate to any combination of C. This is not what I want. I want X aggregated for every value of each column of C INDEPENDENTLY .
X<-structure(list(V1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)),
.Names = "V1", class = "data.frame", row.names = c(NA, -100L))
C<-structure(list(V1 = c(1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L), V2 = c(1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L
), V3 = c(1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 1L)),
.Names = c("V1", "V2", "V3"), class = "data.frame", row.names = c(NA, -100L))
Here is a way to transform your data broken down by incremental steps
dd <- do.call(rbind,
Map(function(a,b) cbind(C=a, b), names(C),
lapply(
lapply(
lapply(C, table, X[[1]], dnn=c("CV","X")),
as.data.frame),
subset, X==1)
))
So here we use table() to get the counts of each X value for each C value. Then we turn that into a data.frame and take only the counts for X=1. Finally we add the correct name of the C column and merge all the data.frames into one large data.frame.
Then we can plot that with
ggplot(dd, aes(x=C, y=Freq, fill=CV)) +
geom_bar(position="dodge", stat="identity")
So the columns of C are listed along the x-axis and the values of C are represented by the color of the bar. The counts of X=1 in each of the groups are the heights of the bars.

Resources