How to combine multiple outputs in summarise in ddply in r? - r

For the Orange data frame why doesn't the following work?
library(plyr)
> ddply(Orange, .(Tree), summarise, circum = list(circumference))
Error: unsupported type for column 'circum' (VECSXP)
I want to combine all circumference values in 1 column.
Orange data frame:
> dput(Orange)
structure(list(Tree = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("3",
"1", "5", "2", "4"), class = c("ordered", "factor")), age = c(118,
484, 664, 1004, 1231, 1372, 1582, 118, 484, 664, 1004, 1231,
1372, 1582, 118, 484, 664, 1004, 1231, 1372, 1582, 118, 484,
664, 1004, 1231, 1372, 1582, 118, 484, 664, 1004, 1231, 1372,
1582), circumference = c(30, 58, 87, 115, 120, 142, 145, 33,
69, 111, 156, 172, 203, 203, 30, 51, 75, 108, 115, 139, 140,
32, 62, 112, 167, 179, 209, 214, 30, 49, 81, 125, 142, 174, 177
)), .Names = c("Tree", "age", "circumference"), row.names = c(NA,
35L), class = c("nfnGroupedData", "nfGroupedData", "groupedData",
"data.frame"), formula = circumference ~ age | Tree, labels = structure(list(
x = "Time since December 31, 1968", y = "Trunk circumference"), .Names = c("x",
"y")), units = structure(list(x = "(days)", y = "(mm)"), .Names = c("x",
"y")))

Related

Adding sample size to ggplot boxplot

I'm interested to see how age is related to a continuous outcome, for which I have the following data:
library(dplyr)
library(tidyverse)
library(magrittr)
library(ggplot2)
mydata <-
structure(list(ID = c(104, 157, 52, 152, 114, 221, 320, 125,
75, 171, 80, 76, 258, 82, 142, 203, 37, 92, 202, 58, 194, 38,
4, 137, 25, 87, 40, 117, 21, 255, 277, 315, 96, 134, 185, 94,
3, 153, 172, 65, 279, 209, 60, 13, 154, 160, 24, 29, 159, 213,
127, 74, 48, 126, 184, 132, 61, 141, 27, 49, 8, 39, 164, 162,
34, 205, 179, 119, 77, 135, 138, 165, 103, 253, 14, 20, 310,
84, 30, 273, 22, 105, 262, 116, 86, 83, 145, 31, 95, 51, 81,
271, 36, 50, 189, 2, 115, 7, 197, 54), age = c(67.1, 70.7, 53,
61.7, 66.1, 57.7, 54.1, 67.2, 60.9, 55.8, 40.7, 57.6, 64.1, 70.7,
47.5, 46.3, 66.7, 55, 63.3, 68.2, 61.2, 60.5, 52, 65.3, 48.9,
56.9, 62.7, 75.2, 61.4, 57.9, 53.6, 58.1, 51, 67.3, 63.9, 57,
43.2, 64.7, 62.8, 56.3, 51.7, 39.4, 45.2, 57.8, 55.7, 69.6, 61.5,
50.1, 73.7, 55.5, 65.2, 54.6, 49, 35.2, 52.9, 46.3, 55, 52.5,
54.2, 61, 57.4, 56.5, 53.6, 47.7, 64.2, 53.4, 60.9, 58.2, 60.7,
50.3, 48.3, 74.7, 52.1, 59.9, 52.4, 70.8, 61.2, 66.5, 55.4, 57.5,
59.2, 60.1, 52.3, 60.2, 54.8, 36.3, 61.5, 48.6, 56, 62, 64.8,
40.4, 68.3, 60, 69.1, 56.6, 45.3, 58.5, 52.3, 52), continuous_outcome = c(3636.6,
1128.2, 2007.5, 802.9, 332.3, 2636.1, 169.5, 67.9, 3261.8, 1920.3,
155.2, 1677.2, 198.2, 11189.7, 560.9, 633.1, 196.1, 13.9, 100.7,
7594.5, 1039.8, 83.9, 2646.8, 284.6, 306, 1135.6, 1883.1, 5681.4,
1706.2, 2241.1, 97.7, 1106.8, 1107.1, 290.8, 2123.4, 267, 115.3,
138.5, 152.7, 1338.9, 6709.8, 561.7, 1931.7, 3112.4, 1876.3,
3795.9, 5706.7, 7.4, 1324.9, 4095.4, 205.4, 1886, 177.3, 304.4,
1319.1, 415.9, 537.2, 3141.1, 740, 1976.7, 624.8, 983.1, 1163.5,
1432.6, 3730.4, 2023.4, 498.2, 652.5, 982.7, 1345.3, 138.4, 1505.1,
3528.1, 11.9, 884.5, 10661.6, 1911.4, 2800.8, 81.5, 396.4, 409.1,
417.3, 186, 1892.4, 1689.7, 0, 210.1, 210.5, 3484.5, 3196.8,
57.2, 20.2, 947, 540, 1603.1, 1571.8, 9.1, 149.2, 122, 63.2),
age_decades = structure(c(3L, 4L, 2L, 3L, 3L, 2L, 2L, 3L,
3L, 2L, 1L, 2L, 3L, 4L, 1L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 2L,
3L, 1L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 3L,
3L, 2L, 2L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 2L,
3L, 2L, 1L, 4L, 2L, 2L, 2L, 4L, 3L, 3L, 2L, 2L, 2L, 3L, 2L,
3L, 2L, 1L, 3L, 1L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 1L, 2L,
2L, 2L), .Label = c("1", "2", "3", "4"), class = "factor")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
To make a boxplot of age decades on the x axis and my continuous outcome I'm using ggplot2.
I want to make several, and automatically plot the sample size on the x-axis ticks. To do so I've computed labels in the dataset as follows:
mydata <-
mydata %>%
group_by(age_decades) %>%
mutate(n_decades=as_character(n())) %>%
mutate(label_decades=case_when(age_decades==1 ~ "Below 50",
age_decades==2 ~ "Between 50 and 60",
age_decades==3 ~ "Between 60 and 70",
age_decades==4 ~ "Above 70")) %>%
mutate(label_decades=paste0(label_decades, '\n n = ', n_decades)) %>%
ungroup() %>%
relocate(age_decades, label_decades, .after=age) %>%
select(-n_decades) %>%
arrange(ID)
Then I've tried to plot the boxplot using the newly created variable label_decades to label. The first thing I tried was:
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
scale_x_discrete(labels=mydata$label_decades)
But that just plots the first few labels as they occur in the dataset (so they dont correspond to the actual boxplot):
Then I tried:
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
geom_text(data=mydata, aes(age_decades, Inf, label=label_decades),
vjust = 15, size=4)
Which works better but the font is really weird and also the original x axis labels/ticks are still showing.
Anyone know how to solve this issue? Thanks!
The font looks wired because there are many labels with the same text plotted on top of each other. You can use distinct to get only one label per x tick and use the theme function to get rid of x tick labels:
mydata %>%
ggplot(aes(age_decades, continuous_outcome)) +
geom_boxplot() +
geom_text(
data = mydata %>% distinct(age_decades, label_decades),
mapping = aes(label = label_decades),
y = 9e3
) +
theme(
axis.text.x = element_blank()
)
One way would be to turn the labels to factor as well.
library(dplyr)
library(ggplot2)
mydata <- mydata %>%
group_by(age_decades) %>%
mutate(n_decades= as.character(n())) %>%
mutate(label_decades= case_when(age_decades==1 ~ "Below 50",
age_decades==2 ~ "Between 50 and 60",
age_decades==3 ~ "Between 60 and 70",
age_decades==4 ~ "Above 70")) %>%
mutate(label_decades= factor(paste0(label_decades, '\n n = ', n_decades))) %>%
ungroup() %>%
relocate(age_decades, label_decades, .after=age) %>%
select(-n_decades) %>%
arrange(ID)
You can then use it's levels in scale_x_discrete.
ggplot(mydata, aes(x=age_decades, y=continuous_outcome)) +
geom_boxplot() +
scale_x_discrete(labels= levels(mydata$label_decades))

How can I create a logical argument using a value that has character objects in Studio

I've been having some difficulties creating a code using conditionals with one of the arguments being a value with a list objects that are not numerical but characters. I created a list using the medication the participants take and grouped them into the list according to their function. The result I want is if at least 2 out of 6 conditions are present then show "high" if not "Low". Something like this (I included a subset of the data)
structure(list(ID = c("MI-001", "MI-003", "MI-009", "MI-012",
"MI-022", "MI-023", "MI-025", "MI-037", "MI-038", "MI-043", "MI-058",
"MI-060", "MI-068", "MI-069", "MI-073", "MI-078", "MI-082", "MI-085",
"MI-086"), Medicamentos = structure(c(1L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("No",
"Si"), class = "factor"), Tipo_med = c("NA", "NA", "NA", "Enalapril, Furosemida, Metoprolol, Alopurinol",
"NA", "Beclometasona, Salbutamol, Esomeprazol, Losartán, ketotifeno, Atorvastatina",
"Eutirox, Sertralina, Atorvastatina, Metformina", "NA", "NA",
"Verapamilo, Enalapril", "NA", "NA", "NA", "NA", "NA", "NA",
"Enalapril, Tiroxin, Propanolol", "NA", "Losartán, Hidroclorotiazida"
), Clasificacion_general = structure(c(1L, 1L, 1L, 1L, 2L, 3L,
1L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 3L, 3L, 2L, 2L, 1L), .Label = c("Hypertension",
"Normal", "Prehipertensión"), class = "factor"), HDL = c(38,
51, 51, 44, 47, 63, 44, 51, 39, 37, 74, 39, 51, 50, 50, 34, 45,
61, 36), TG = c(195, 76, 160, 128, 38, 80, 166, 106, 128, 175,
132, 111, 128, 112, 119, 440, 202, 80, 188), Glucosa_mg = c(86,
85, 96, 79, 77, 91, 117, 85, 98, 130, 104, 90, 85, 84, 84, 98,
101, 84, 71), HOMAI = c(3.201092, 1.31670666666667, 1.034816,
5.68620933333333, 1.76448066666667, 5.46576333333333, 5.131308,
1.12591, 5.351976, 13.4199, 2.26775466666667, 1.41414, 2.86404666666667,
1.456616, 1.527064, 13.5225626666667, 4.873048, 1.877232, 2.21543666666667
), PCR = c(1.71, 1.84, 3.52, 2.32, 0.37, 2.18, 4.03, 0.46, 1.99,
2.97, 1.51, 0.39, 0.62, 0.93, 0.5, 2.15, 2.56, 0.5, 2.45)), row.names = c(NA,
-19L), class = c("tbl_df", "tbl", "data.frame"))
meds_hiper = c("acido acetilsalicilico", "Amlodipino", "Asawin", "Aspirina", "Aspirineta", "Atorvastatina", "Carvedilol", "Coumadin", "Diovan", "Enalapril", "Espironolactona", "Furosemida", "Hidroclorotiazida", "Hidroclorotizida", "Losarta", "Losartan", "Metildigoxina", "Metoprolol", "Nimodipina", "Nimodipino", "Propanolol", "Venadol", "Verapamilo", "Warfarina", "hipertension")
meds_diab = c("Galvus met", "Glibenclamida", "Insulina", "Insulina lantus", "Insulina NPH", "Metformina")
meds_lip = c("Atorvastatina", "Ciprofibrato", "Estatina", "Gemfibrozilo", "Gemfribrozilo", "Lovastatina", "Simvastatina", "colesterol")
multi<-multi%>% mutate(SCM=case_when(as.numeric(TG>150)|as.character(meds_lip>=1) +
as.numeric(Glucosa_mg>100)|as.character(meds_diab>=1) +
as.numeric(HOMAI > 3) +
as.numeric(PCR>3)+
as.numeric(Clasificacion_general=="Hypertension")|as.character(meds_hiper>=1)+
as.numeric (HDL<40 )+(Sexo==2)|
as.numeric (HDL<50 )+(Sexo==1)|as.character(meds_lip>=1) >=2 ~ "High",
TRUE ~ "Low"),
SCM=factor(SCM))
Thank you in advance

ggplot facet_grid: plotting hourly data for different days, directly below each other

I am trying to plot count of visitors for different days. I would like to use facet_grid to have the plots on a common X-axis directly below each other. Every time I try, the second plot (day 2) ends up on the right. Does somebody know what I have done wrong? Below is the code I am using:
ggplot(count_visitors, aes(x = date)) +
geom_line(aes(y=average_count), colour=colour[1], size = 0.5) +
geom_line(aes(y=count_max), colour=colour[1], size = 0.5, alpha="0.2") +
geom_line(aes(y=count_min), colour=colour[1], size = 0.5, alpha="0.2") +
geom_ribbon(aes(ymin=count_min,ymax=count_max), fill=colour[1], alpha="0.2") +
labs(x = "Time", y = "Visitors Count") +
scale_y_continuous(breaks = seq(0, 600, by=100), limits = c(0, 600)) +
scale_x_datetime(labels = date_format("%H:%M")) +
facet_grid(day_month ~ .)
And this is how the data looks like:
$ date : POSIXct, format: "2017-12-02 07:00:00" "2017-12-02 07:15:00" "2017-12-02 07:30:00" "2017-12-02 07:45:00" ...
$ day_month : int 2 2 2 2 2 2 2 2 2 2 ...
$ average_count: num 1 2 2.5 3.5 9 11 19.5 31.5 62 90.5 .
$ count_min : num 0 0 0 0 2 4 9 15 39 61 ...
$ count_max : num 2 4 5 7 16 18 30 48 85 120 ...
structure(list(date = structure(c(1512198000, 1512198900, 1512199800,
1512200700, 1512201600, 1512202500, 1512203400, 1512204300, 1512205200,
1512206100, 1512207000, 1512207900, 1512208800, 1512209700, 1512210600,
1512211500, 1512212400, 1512213300, 1512214200, 1512215100, 1512216000,
1512216900, 1512217800, 1512218700, 1512219600, 1512220500, 1512221400,
1512222300, 1512223200, 1512224100, 1512225000, 1512225900, 1512226800,
1512227700, 1512228600, 1512229500, 1512230400, 1512231300, 1512232200,
1512233100, 1512234000, 1512234900, 1512235800, 1512236700, 1512237600,
1512238500, 1512239400, 1512240300, 1512241200, 1512242100, 1512243000,
1512243900, 1512244800, 1512245700, 1512246600, 1512247500, 1512248400,
1512249300, 1512250200, 1512251100, 1512252000, 1512252900, 1512253800,
1512254700, 1512255600, 1512111600, 1512112500, 1512113400, 1512114300,
1512115200, 1512116100, 1512117000, 1512117900, 1512118800, 1512119700,
1512120600, 1512121500, 1512122400, 1512123300, 1512124200, 1512125100,
1512126000, 1512126900, 1512127800, 1512128700, 1512129600, 1512130500,
1512131400, 1512132300, 1512133200, 1512134100, 1512135000, 1512135900,
1512136800, 1512137700, 1512138600, 1512139500, 1512140400, 1512141300,
1512142200, 1512143100, 1512144000, 1512144900, 1512145800, 1512146700,
1512147600, 1512148500, 1512149400, 1512150300, 1512151200, 1512152100,
1512153000, 1512153900, 1512154800, 1512155700, 1512156600, 1512157500,
1512158400, 1512159300, 1512160200, 1512161100, 1512162000, 1512162900,
1512163800, 1512164700, 1512165600, 1512166500, 1512167400, 1512168300,
1512169200), class = c("POSIXct", "POSIXt"), tzone = "GMT"),
day_month = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
count_min = c(0, 0, 0, 0, 2, 4, 9, 15, 39, 61, 78, 95, 110,
121, 135, 151, 154, 173, 175, 187, 201, 227, 239, 254, 265,
275, 278, 288, 293, 290, 290, 293, 295, 299, 297, 284, 279,
278, 255, 250, 231, 224, 204, 184, 178, 170, 161, 149, 146,
148, 153, 150, 138, 127, 119, 112, 91, 79, 53, 40, 29, 15,
11, 9, 5, 1, 1, 1, 1, 1, 2, 3, 5, 14, 16, 26, 35, 58, 67,
89, 114, 141, 159, 183, 187, 198, 208, 207, 206, 209, 209,
204, 194, 180, 175, 156, 142, 145, 133, 128, 121, 104, 100,
85, 74, 75, 81, 93, 106, 104, 116, 121, 137, 151, 153, 159,
168, 165, 159, 156, 144, 119, 102, 84, 60, 35, 23, 17, 15,
10), count_max = c(2, 4, 5, 7, 16, 18, 30, 48, 85, 120, 146,
176, 207, 229, 253, 295, 312, 327, 348, 370, 392, 418, 446,
457, 489, 501, 509, 507, 514, 515, 533, 550, 564, 554, 557,
552, 552, 524, 502, 476, 447, 432, 411, 400, 380, 352, 341,
322, 314, 312, 303, 292, 288, 262, 239, 219, 202, 177, 138,
108, 81, 43, 32, 22, 12, 2, 2, 2, 2, 2, 7, 10, 21, 33, 44,
64, 89, 117, 153, 186, 222, 260, 279, 298, 323, 332, 341,
345, 349, 361, 361, 367, 364, 352, 324, 309, 291, 282, 267,
256, 240, 220, 197, 192, 185, 181, 184, 195, 203, 208, 202,
218, 245, 269, 297, 312, 320, 315, 317, 301, 284, 250, 220,
194, 166, 124, 77, 41, 30, 20), average_count = c(1, 2, 2.5,
3.5, 9, 11, 19.5, 31.5, 62, 90.5, 112, 135.5, 158.5, 175,
194, 223, 233, 250, 261.5, 278.5, 296.5, 322.5, 342.5, 355.5,
377, 388, 393.5, 397.5, 403.5, 402.5, 411.5, 421.5, 429.5,
426.5, 427, 418, 415.5, 401, 378.5, 363, 339, 328, 307.5,
292, 279, 261, 251, 235.5, 230, 230, 228, 221, 213, 194.5,
179, 165.5, 146.5, 128, 95.5, 74, 55, 29, 21.5, 15.5, 8.5,
1.5, 1.5, 1.5, 1.5, 1.5, 4.5, 6.5, 13, 23.5, 30, 45, 62,
87.5, 110, 137.5, 168, 200.5, 219, 240.5, 255, 265, 274.5,
276, 277.5, 285, 285, 285.5, 279, 266, 249.5, 232.5, 216.5,
213.5, 200, 192, 180.5, 162, 148.5, 138.5, 129.5, 128, 132.5,
144, 154.5, 156, 159, 169.5, 191, 210, 225, 235.5, 244, 240,
238, 228.5, 214, 184.5, 161, 139, 113, 79.5, 50, 29, 22.5,
15)), class = "data.frame", row.names = c(NA, -130L))
Example Image
One option is to use facet_wrap instead.
Note that I removed lots of your rather redundant code (for the question). Would recommend to have a look at how to create an MCVE
ggplot(count_visitors, aes(x = date)) +
geom_line(aes(y=average_count), size = 0.5) +
geom_ribbon(aes(ymin=count_min,ymax=count_max), alpha="0.2") +
facet_wrap(day_month ~ ., nrow = 2, scales = 'free_x')

specific stripchart with ggplot2

I've got this dataframe
df <- structure(list(rang = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25.5, 25.5, 27.5,
27.5, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42.5,
42.5, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54.5, 54.5, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88
), dr = c(164, 176, 260, 297, 308, 313, 327, 333, 339, 365, 396,
403, 404, 410, 413, 414, 422, 424, 440, 442, 443, 451, 477, 496,
530, 530, 546, 546, 548, 565, 567, 574, 576, 587, 590, 603, 619,
626, 629, 630, 642, 653, 653, 660, 667, 670, 677, 682, 689, 711,
716, 737, 763, 772, 772, 776, 778, 792, 794, 820, 835, 838, 842,
855, 861, 888, 890, 899, 906, 908, 969, 1011, 1046, 1058, 1069,
1072, 1074, 1100, 1153, 1348, 1368, 1432, 1468, 1516, 1612, 1712,
1714, 1731), signe = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L), .Label = c("negatif", "positif"), class = "factor")), .Names = c("rang",
"dr", "signe"), row.names = c(NA, -88L), class = "data.frame")
and this chart when I use the stripchart function in base R
stripchart(df[,1]~df[,3],
method="stack", vertical=FALSE, ylim=c(0.5,2.5),
group.names=levels(df[,3]),
xlab="Rang des différences dr", pch=18, cex=1.2)
Can I have the same plot with the library ggplot2?
I used geom_dotplot but I didn't the same plot. This an example
ggplot(data = df, aes(y=df[,1], x=factor(df[,3]))) +
geom_dotplot(binaxis = "y", dotsize = 0.5) +
coord_cartesian(ylim=c(0, 88)) +
scale_y_continuous(breaks=seq(0, 88, 1))
Help me, please!
You have to flip coordinates, and set binwidth = 1 to get the same plot:
ggplot(data = df, aes(y=rang, x=factor(signe))) +
geom_dotplot(binaxis = "y", dotsize = 0.8, binwidth=1) +
coord_cartesian(ylim=c(0, 88)) +
scale_y_continuous(name='Rang des différences dr') +
scale_x_discrete(name='') +
coord_flip() +
theme_bw(base_size = 20)
Is this along the lines of what you were looking for:
ggplot(df) + geom_point(aes(df[,1],df[,3])) + theme_bw()

How to make beanplot and boxplot in the same chart?

seasons <- structure(list(values = c(204, 339, 304, 434, 334, 212, 361,
102, 298, 369, 149, 227, 278, 199, 360, 211, 219, 209, 177, 299,
262, 285, 237, 227, 216, 229, 317, 321, 327, 123, 84, 321, 442,
263, 225, 290, 259, 219, 244, 325, 257, 672, 762, 381, 698, 578,
576, 386, 834, 790, 815, 736, 517, 556, 685, 781, 703, 1071,
537, 784, 753, 790, 489, 878, 433, 742, 638, 731, 1017, 850,
804, 612, 923, 1000, 855, 750, 921, 676, 621, 781, 703, 1054,
156, 312, 267, 152, 352, 155, 215, 184, 186, 221, 352, 183, 307,
353, 507, 255, 159, 109, 343, 377, 209, 260, 193, 231, 111, 167,
233, 360, 488, 347, 208, 178, 371, 276, 263, 166, 486, 119, 153,
315, 226, 158, 142, 78, 75, 156, 53, 103, 141, 94, 94, 55, 84,
35, 82, 65, 150, 30, 201, 184, 94, 119, 150, 70, 63, 50, 74,
160, 49, 52, 135, 105, 129, 75, 83, 85, 84, 85, 77, 147, 100,
46), ind = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("spring",
"summer", "autumn", "winter"), class = "factor", scores = structure(c(3,
1, 2, 4), .Dim = 4L, .Dimnames = list(c("autumn", "spring", "summer",
"winter"))))), .Names = c("values", "ind"), row.names = c(NA,
-164L), class = "data.frame")
I made a boxplot and beanplot below.
boxplot(seasons$values~seasons$ind, ylim= c(0,1200))
beanplot(seasons$values~seasons$ind, ylim= c(0,1200),
col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", side="second")
I want to make a chart containing these boxplot and beanplot at the same time.
This would make comparison easy. Thai is why I made same ylim on both plots.
Is there any way I can do?
With beanplot package, use add=TRUE:
boxplot(seasons$values~seasons$ind, ylim= c(0,1200))
beanplot(seasons$values~seasons$ind, ylim= c(0,1200), col = c("#CAB2D6", "#33A02C", "#B2DF8A"), border = "#CAB2D6", side="second", add=T)
Try with ggplot:
ggplot(seasons, aes(x=ind, y=values))+geom_boxplot()+geom_violin(fill='lightblue', alpha=0.5)+geom_jitter(position = position_jitter(width = .1))

Resources