p <- plot_ly(data = bData, x = ~`Maturity Date`, y = ~YVal, type = 'scatter', mode='markers',
symbol = ~Sym, symbols = c('circle-open','x-open','diamond-open','square-open') ,
text = ~paste(bData$Security,bData$Crncy, bData$YTM, bData$DM,sep = "<br>") ,hoverinfo = 'text'
)
Above code produces this plot.
Now to this chart I want to add a trace with scatter plot with color depending on Currency column.
I tried this but it produces combination of two field as the legend.
Basically I want to classify the plot based on currency type but also add overlay or trace based on column SYM as the symbol.
p <- plot_ly(data = bData, x = ~`Maturity Date`, y = ~YVal, type = 'scatter', mode='markers',
symbol = ~Sym, symbols = c('circle-open','x-open','diamond-open','square-open') ,
text = ~paste(bData$Security,bData$Crncy, bData$YTM, bData$DM,sep = "<br>") ,hoverinfo = 'text'
) %>%
add_trace(x = ~`Maturity Date`, y = ~YVal , color=~Crncy)
data:
bData <- structure(list(Crncy = structure(c(9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 3L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 5L, 9L, 9L, 9L, 9L, 9L, 9L,
5L, 9L, 9L, 9L, 9L, 6L, 5L, 9L, 9L, 3L, 9L, 5L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 5L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 7L, 6L, 7L, 6L, 9L,
7L, 7L, 3L, 2L, 7L, 9L, 9L, 9L, 9L, 8L, 9L, 9L, 9L, 10L, 9L,
9L, 4L, 4L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 4L, 9L, 9L,
9L, 5L, 9L, 9L, 9L, 9L, 5L, 9L, 5L, 9L, 2L, 9L, 5L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 2L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 1L, 5L, 1L, 9L, 9L, 9L,
9L, 9L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 6L, 9L, 9L,
9L, 9L, 1L, 1L, 9L, 9L, 9L, 9L, 9L, 1L, 9L, 9L), .Label = c("AUD",
"CAD", "CHF", "COP", "EUR", "GBP", "JPY", "PEN", "USD", "ZAR"
), class = "factor"), `Maturity Date` = structure(c(20772, 19689,
18969, 18969, 20815, 20119, 20865, 20864, 20134, 20873, 20873,
20887, 20011, 20897, 20162, 19797, 20908, 20908, 20923, 19841,
19107, 19107, 20941, 20935, 20936, 20936, 20953, 20049, 19138,
19860, 21005, 21027, 19562, 19562, 21014, 19222, 21047, 19950,
19264, 19285, 19292, 19292, 19323, 19382, 19381, 20000, 19404,
20176, 19437, 19875, 19875, 19508, 20635, 19555, 19555, 20658,
19038, 19628, 18946, 19745, 19746, 19021, 19042, 19042, 20545,
20623, 19047, 19412, 19415, 20178, 20178, 19611, 19807, 20168,
20551, 20640, 20957, 20223, 19858, 19692, 19158, 20258, 19720,
20269, 20999, 20999, 20290, 20278, 20300, 20300, 21029, 19753,
20318, 20328, 20423, 20120, 20223, 20240, 19335, 20594, 19510,
19905, 20073, 20347, 20392, 18897, 20962, 20994, 21009, 21043,
19287, 19505, 18899, 19006, 19081, 19323, 19373, 19203, 19417,
19415, 19430, 19469, 19492, 19527, 19599, 20344, 19638, 19655,
19675, 19688, 20068, 19711, 19780, 19803, 19838, 19865, 19892,
19890, 19940, 19962, 20706, 20011, 18927, 20041, 18949, 20777,
20116, 20145, 19041, 20156, 20177, 20174, 20173, 20205, 20208,
20235, 20248, 20249, 19523, 20521, 20588, 20574, 20465, 20482,
19400, 20588, 21021, 20649, 20389, 20409, 19950, 19600, 19601,
20346, 19658, 20747, 19657, 19656, 19657, 20307, 20347, 19259,
20087, 20810, 20077, 19349, 20118, 20483, 20112, 20109, 19392,
19594, 20144, 21056, 19407, 20749, 20573, 19296, 19300, 19300,
19310, 20041, 19346, 20907, 19976, 20744, 20202, 19132, 19132,
19132), class = "Date"), Sym = structure(c(4L, 3L, 4L, 1L, 2L,
3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 4L, 1L, 4L, 3L, 2L, 1L, 4L, 1L, 2L, 1L, 2L,
1L, 2L, 3L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 4L, 3L, 2L,
1L, 4L, 1L, 4L, 1L, 2L, 1L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 1L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 2L, 1L, 2L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L,
3L, 4L, 3L, 4L, 3L, 2L, 1L, 2L, 3L, 4L, 3L, 4L, 3L, 2L, 3L, 4L,
3L, 4L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 4L, 4L, 4L, 4L), .Label = c("Axe",
"Axe, Owned", "None", "Owned"), class = "factor"), YVal = c(20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,
166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178,
179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204,
205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217,
218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229)), class = "data.frame", row.names = c(NA,
-210L))
Maybe is this what you are looking for? (I have used split from plotly):
library(plotly)
#Code
plot_ly(data = bData, x = ~`Maturity Date`, y = ~YVal, type = 'scatter', mode='markers',
symbol = ~Sym, symbols = c('circle-open','x-open','diamond-open','square-open') ,
split = ~Crncy,
text = ~paste(bData$Security,bData$Crncy, bData$YTM, bData$DM,sep = "<br>") ,
hoverinfo = 'text')
Output:
Update: Here somo other options for OP:
#Option 1
plot_ly(data = bData, x = ~`Maturity Date`, y = ~YVal, type = 'scatter', mode='markers',
symbol = ~Sym, symbols = c('circle-open','x-open','diamond-open','square-open') ,
text = ~paste(bData$Security,bData$Crncy, bData$YTM, bData$DM,sep = "<br>") ,
hoverinfo = 'text',legendgroup = 'group1'
) %>%
add_trace(x = ~`Maturity Date`, y = ~YVal , symbol=~Crncy,legendgroup = 'group2')
Output:
Option 2:
#Option 2
plot_ly(bData, x = ~`Maturity Date`, y = ~YVal, type = 'scatter', mode='markers',
legendgroup = 'group1',color = ~Sym) %>%
add_trace(y = ~YVal, legendgroup = 'group2',type = 'scatter', mode='markers',
color=~Crncy)
Output:
Related
I need to represent an important number of graphs. To do that I used a the facet_wrap_paginate function, but I only have the second page printed/plotted.
Here is a sample of my data:
df <- structure(list(oxygen = c(60.86414, 62.50372, 89.76055, 61.15914,
61.49936, 61.90402, 61.60323, 61.2873, 75.12329, 60.83503, 63.03796,
61.78298, 70.16533, 62.60887, 61.49043, 64.42095, 61.78358, 62.29302,
62.88122, 60.47264, 65.92805, 65.0235, 64.32414, 65.56869, 64.65729,
66.73502, 67.01285, 67.47757, 69.62232, 66.09624, 66.18106, 64.28226,
64.85658, 67.46348, 68.93226, 65.23919, 65.67461, 64.64727, 64.90544,
68.10878, 70.00292, 59.54631, 61.43627, 78.75032, 61.94263, 82.08963,
62.75281, 61.93749, 72.86382, 62.06785, 61.04865, 95.49206, 69.79339,
66.82354, 61.53702, 85.51962, 101.53748, 61.64096, 92.14781,
60.41892, 77.55303, 75.39007, 62.15703, 82.38292, 87.27823, 69.28644,
73.72369, 62.02667, 61.34973, 61.61716, 70.61777, 61.59655, 60.31008,
61.65777, 71.82138, 61.9823, 79.38791, 60.31816, 108.2866, 61.54543,
69.87043, 64.30304, 65.05726, 68.69304, 84.0604, 121.05592, 68.67872,
69.53102, 68.31013, 64.76675, 122.22451, 91.29598, 69.43838,
68.73515, 77.99483, 68.56142, 64.14964, 73.2695, 69.67608, 69.01853,
68.5685, 63.9001, 68.93557, 71.84125, 72.73052, 69.17045, 76.28085,
68.41978, 69.53005, 76.15901, 86.43705, 72.95997, 64.0582, 70.29805,
64.10386, 88.09503, 68.07687, 69.62147, 67.77802, 125.09757,
91.96934, 97.26108, 89.16293, 80.245, 124.97964, 97.28237, 95.95074,
99.24807, 82.59685, 89.2217, 77.50349, 88.54909, 91.61343, 77.38885,
94.06567, 84.77561, 80.46941, 92.28428, 74.25416, 97.07623),
depth = c(707.5, 195, 47.5, 750, 315.5, 506.5, 660.5, 678,
146, 821.5, 197, 365, 141.5, 448, 574.5, 325.5, 645.5, 236,
446, 872.5, 315, 729.5, 786.5, 573.5, 752, 202, 455, 412,
147, 552.5, 517, 904, 339.5, 184, 308.5, 307.5, 610, 705.5,
700, 375, 228.5, 261.5, 398.5, 168.5, 360, 136.5, 504, 299.5,
187, 473.5, 249.5, 64.5, 178.5, 251.5, 369.5, 95, 66.5, 599.5,
72, 474, 178, 33.5, 109, 86.5, 26.5, 240, 51.5, 367, 295.5,
408.5, 244.5, 106, 314, 304.5, 69.5, 361.5, 91.5, 125.5,
11, 281.5, 101.5, 472.5, 433, 257, 24, 6, 192.5, 265, 226,
449, 1.5, 21.5, 362.5, 238.5, 33, 246, 477.5, 46, 109, 315,
275.5, 636, 284.5, 267, 387, 336.5, 225.5, 291, 205, 136,
60.5, 171, 515.5, 260.5, 563, 50, 436, 123, 227, 7.5, 12,
87, 32.5, 239, 14.5, 103.5, 90.5, 120.5, 38.5, 36.5, 406,
38.5, 143.5, 371, 87, 54.5, 234, 53.5, 260.5, 123), ctd_file = c("BA16007",
"BA16002", "BA16011", "BA16003", "BA16004", "BA16006", "BA16001",
"BA16002", "BA16012", "BA16003", "BA16004", "BA16004", "BA16005",
"BA16004", "BA16009", "BA16005", "BA16012", "BA16004", "BA16006",
"BA16003", "BA16013", "BA16014", "BA16017", "BA16016", "BA16018",
"BA16016", "BA16014", "BA16020", "BA16020", "BA16020", "BA16016",
"BA16018", "BA16016", "BA16018", "BA16021", "BA16015", "BA16014",
"BA16013", "BA16015", "BA16021", "BA16025", "BA16023", "BA16024",
"BA16025", "BA16024", "BA16025", "BA16025", "BA16025", "BA16024",
"BA16025", "BA16023", "BA16025", "BA16023", "BA16025", "BA16024",
"BA16022", "BA16022", "BA16025", "BA16024", "BA16024", "BA16026",
"BA16035", "BA16030", "BA16035", "BA16031", "BA16026", "BA16032",
"BA16031", "BA16027", "BA16032", "BA16035", "BA16028", "BA16031",
"BA16027", "BA16033", "BA16032", "BA16035", "BA16028", "BA16027",
"BA16032", "BA16034", "BA16034", "BA16034", "BA16034", "BA16034",
"BA16034", "BA16034", "BA16034", "BA16034", "BA16034", "BA16034",
"BA16034", "BA16034", "BA16034", "BA16034", "BA16034", "BA16034",
"BA16034", "BA16034", "BA16034", "BA16040", "BA16036", "BA16043",
"BA16047", "BA16048", "BA16041", "BA16049", "BA16036", "BA16042",
"BA16044", "BA16048", "BA16045", "BA16039", "BA16046", "BA16036",
"BA16048", "BA16044", "BA16038", "BA16041", "BA16045", "BA16052",
"BA16055", "BA16054", "BA16054", "BA16053", "BA16053", "BA16053",
"BA16055", "BA16055", "BA16053", "BA16055", "BA16054", "BA16052",
"BA16055", "BA16054", "BA16053", "BA16054", "BA16055", "BA16052",
"BA16053"), Transect = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor"), Station = structure(c(7L,
2L, 11L, 3L, 4L, 6L, 1L, 2L, 12L, 3L, 4L, 4L, 5L, 4L, 9L,
5L, 12L, 4L, 6L, 3L, 1L, 2L, 5L, 4L, 6L, 4L, 2L, 8L, 8L,
8L, 4L, 6L, 4L, 6L, 9L, 3L, 2L, 1L, 3L, 9L, 4L, 2L, 3L, 4L,
3L, 4L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 4L, 3L, 1L, 1L, 4L, 3L,
3L, 1L, 9L, 5L, 9L, 6L, 1L, 7L, 6L, 2L, 7L, 9L, 3L, 6L, 2L,
8L, 7L, 9L, 3L, 2L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 8L,
9L, 3L, 10L, 1L, 4L, 5L, 9L, 6L, 1L, 7L, 1L, 9L, 5L, 1L,
3L, 6L, 3L, 6L, 5L, 5L, 4L, 4L, 4L, 6L, 6L, 4L, 6L, 5L, 3L,
6L, 5L, 4L, 5L, 6L, 3L, 4L), .Label = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "11", "12"), class = "factor")), row.names = c(NA,
-140L), groups = structure(list(Transect = structure(1:7, .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor"), .rows = structure(list(
1:20, 21:40, 41:60, 61:80, 81:100, 101:120, 121:140), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -7L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
And there is the code I used:
library(dplyr)
library(tidyverse)
library(ggforce)
i <- ceiling(
length(levels(df$Transect)) / 4) # set the number of pages
pdf("multi_page.pdf", width = 16 / 2.54, height = 12 / 2.54)
SdesGG <- df %>% #launch each time or does not work
group_by(Transect) %>% #mandatory or need to fortify
ggplot(aes(x = oxygen, y = depth, color = Station)) +
geom_line() +
scale_color_brewer(palette = "Paired") +
scale_y_reverse() +
facet_wrap_paginate(~Transect, ncol = 2, nrow = 2, page = i) #ggforce
dev.off()
I have 180 000 observations, therefore it might be interesting to duplicate the number of row
lapply(df, rep, 1000)
With the page argument you only specify
The page to draw (see ?facet_wrap_paginate)
That's why you get only the last or second page with page = i.
If you want all pages you have to loop over the pages:
library(ggplot2
library(ggforce)
i <- ceiling(
length(levels(df$Transect)) / 4) # set the number of pages
pdf("multi_page.pdf", width = 16 / 2.54, height = 12 / 2.54)
lapply(seq(i), function(page) {
SdesGG <- df %>% #launch each time or does not work
group_by(Transect) %>% #mandatory or need to fortify
ggplot(aes(x = oxygen, y = depth, color = Station)) +
geom_line() +
scale_color_brewer(palette = "Paired") +
scale_y_reverse() +
facet_wrap_paginate(~Transect, ncol = 2, nrow = 2, page = page) #ggforce
})
dev.off()
#> [[1]]
#>
#> [[2]]
I created a line-plot with ggplot. Now I would like to add the 95%-confidence intervall to the data points. On my search for a way to do that, I found the function "SummarySE" from the "Cookbook for R" and tried to apply it to my data.
The problem I have is, that all the new, calculated columns only contain NA and NaN with the Warning message: In qt(conf.interval/2 + 0.5, datac$N - 1) : NaNs produced.
head() of the output I'm getting:
My data:
structure(list(J01 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("Tetrazykline",
"Penicilline", "Andere Beta-Lactame", "Sulfonamide & Trimethoprim",
"Makrolide, Lincosamide & Streptogramine", "Aminoglykoside",
"Chinolone", "Andere Antibiotika"), class = "factor"), Monat = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L), .Label = c("Jan 2018", "Feb 2018", "Mär 2018",
"Apr 2018", "Mai 2018", "Jun 2018", "Jul 2018", "Aug 2018", "Sep 2018",
"Okt 2018", "Nov 2018", "Dez 2018"), class = "factor"), DDDs = c(262,
5729, 3305, 495, 588, 153, 944, 922, 85, 6386, 3157, 546, 753,
120, 421, 419, 102, 6758, 3582, 682, 853, 167, 884, 1061, 122,
5628, 3395, 564, 520, 161, 501, 728, 208, 4976, 3380, 508, 620,
205, 704, 1106, 147, 4420, 3521, 634, 611, 110, 728, 1212, 133,
5717, 3783, 708, 582, 230, 593, 842, 387, 6191, 3796, 605, 410,
62, 595, 875, 130, 5309, 4084, 2010, 395, 163, 632, 1202, 84,
5266, 3437, 617, 568, 141, 623, 948, 60, 5972, 3485, 718, 518,
132, 588, 1034, 182, 4924, 3360, 573, 405, 96, 319, 1337)), row.names = c(NA,
-96L), class = "data.frame")
Does anybody now what the problem is and how I can fix it?
I am trying to plot count v/s month
ggplot(dat, aes(x=month, y=count,group=region)) +
geom_line(data=mcount[mcount$region == "West coast", ],colour="black",stat="identity", position="dodge")+
geom_point(data=mcount[mcount$region == "West coast", ],colour="black", size=2, shape=21, fill="white")+
theme_bw()+
theme(legend.key = element_rect(colour = "black")) +
guides(fill = guide_legend(override.aes = list(colour = NULL)))+
ggsave("test.png",width=6, height=4,dpi=300)
But I want to order the months chronologically from Jan to Dec. How can I do this short of writing all the months out?
dput
structure(list(region = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("West coast", "Arizona", "Front range", "Flash flood alley",
"Mississippi valley", "Appalachians"), class = "factor"), month = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 4L, 12L, 11L,
5L, 2L, 9L, 8L, 6L, 10L, 3L, 7L, 8L, 10L, 5L, 1L, 6L, 7L, 4L,
6L, 8L, 2L, 1L, 7L, 5L, 3L, 11L, 12L, 9L, 10L, 2L, 7L, 3L, 6L,
12L, 11L, 10L, 9L, 4L, 1L, 11L, 4L, 2L, 1L, 12L, 9L, 3L, 8L,
5L, 6L, 10L, 7L, 5L, 8L, 11L, 12L, 4L, 3L, 9L, 2L), .Label = c("Apr",
"Dec", "Oct", "Mar", "May", "Jul", "Sep", "Jun", "Nov", "Aug",
"Jan", "Feb"), class = "factor"), count = c(566, 545, 427, 751,
357, 399, 568, 433, 454, 347, 511, 251, 267, 207, 167, 142, 417,
109, 117, 373, 207, 130, 125, 145, 7, 14, 2, 2, 7, 3, 107, 74,
135, 48, 80, 53, 117, 125, 59, 53, 103, 30, 21, 18, 8, 22, 26,
37, 20, 5, 11, 1, 96, 29, 109, 8, 33, 53, 6, 1, 5, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0)), .Names = c("region", "month", "count"), row.names = c(NA,
-72L), class = c("data.table", "data.frame"))
Use the built-in month.name or month.abb variable to specify the levels of your factor in the correct order. In this case, you have abbreviations so month.abb is appropriate.
your_data$month = factor(your_data$month, levels = month.abb)
I think creating the factor in the correct order is the best way, but you can also just order the axis using the limits argument of the discrete scale (see ?discrete_scale for more info).
+ scale_x_discrete(limits = month.abb)
Locales
If you are in a non-English locale, you can construct your own month name constants with a little date formatting (basically stolen from Brian Ripley in this R-Help thread):
month.name.loc = format(ISOdate(2004, 1:12, 1), "%B")
month.abb.loc = format(ISOdate(2004, 1:12, 1), "%b")
If you want to use month names/abbreviations from a different locale than you're in, the withr package is useful.
I'm trying to match the output from a model specified in another software, HLM, in R, here: http://justpaste.it/q10n
The model I've tried so far (a random slope with a correlated intercept), isn't matching up for the fixed effects:
m1 <- lmer(formula = mathach ~ 1 + freered + (1 + hrs | school), data = dat)
Here's the R output for comparison:
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula:
mathach ~ 1 + freered + (1 + hrs | school)
Data: dat
REML criterion at convergence: 180.2
Scaled residuals:
Min 1Q Median 3Q Max
-2.1706 -0.4274 -0.0611 0.3395 4.3201
Random effects:
Groups Name Variance Std.Dev. Corr
school (Intercept) 102.96318 10.1471
hrs 6.03046 2.4557 -1.00
Residual 0.09409 0.3067
Number of obs: 100, groups: school, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 68.99782 0.59137 116.67
freered 0.48765 0.03025 16.12
Correlation of Fixed Effects:
(Intr)
freered -0.895
and the dataset:
dat <- structure(list(school = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "factor"),
student = 1:100, hrs = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L), mathach = c(100, 96, 92, 88, 84, 80, 76, 72, 68,
65, 95, 92, 89, 86, 83, 80, 77, 74, 71, 69, 90, 88, 86, 84,
82, 80, 78, 76, 74, 71, 85, 84, 83, 82, 81, 80, 79, 78, 77,
77, 80, 79.5, 79, 78.5, 78, 77.5, 77, 76.5, 76, 76, 75, 75.5,
76, 76.5, 77, 77.5, 78, 78.5, 79, 80, 70, 71, 72, 73, 74,
75, 76, 77, 78, 80, 65, 67, 69, 71, 73, 75, 77, 79, 81, 82,
60, 63, 66, 69, 72, 75, 78, 81, 84, 86, 55, 59, 63, 67, 71,
75, 79, 83, 87, 93), freered = c(30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 27L, 27L, 27L, 27L, 27L, 27L, 27L,
27L, 27L, 27L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)), .Names = c("school",
"student", "hrs", "mathach", "freered"), row.names = c(NA, -100L
), class = "data.frame")
Any suggestions?
Here's a start: we get closer by (1) putting in an interaction between (1+hrs) and freered in the fixed effects; (2) centering the hrs and freered variables (as stated in the HLM log):
library(lme4)
library(broom) ## for tidy() (cosmetic: may (???) need Github devel version?)
dat2 <- transform(dat,hrs=scale(hrs,scale=FALSE),
freered=scale(freered,scale=FALSE))
m1 <- lmer(formula = mathach ~ (1 + hrs)*freered + (1 + hrs | school),
data = dat2)
## equivalent, explicit response formula:
## ~ 1 + freered + hrs + freered : hrs + (1 + hrs | school)
print(tidy(m1),digits=3)
term estimate std.error statistic group
1 (Intercept) 77.5400 0.2677 289.619 fixed
2 hrs 0.0218 0.1245 0.175 fixed
3 freered 0.3811 0.0307 12.399 fixed
4 hrs:freered -0.2791 0.0143 -19.520 fixed
5 sd_(Intercept).school 0.8411 NA NA school
6 sd_hrs.school 0.3923 NA NA school
7 cor_(Intercept).hrs.school 0.1793 NA NA school
8 sd_Observation.Residual 0.3067 NA NA Residual
I am trying to use ggplot2 to visualize my dataframe. Each row in my data frame represents a person with three values: the role of the person, the village of the person and the distance of that village from a common origin. I'd like to visualize roles (like box plots) for each villages, but I'd also like villages to be plotted not in alphabetical order, but according to distance (from the closest to the farthest). All I have right now is:
qplot(h[,2],h[,1], xlab="village", ylab="role")
I do not understand how to tell qplot() to make box plots and how to order villages according to distance. I also would like to show as a top x-axis the distances. Any help more than welcome!
My data frame is like this:
h<-structure(list(role = structure(c(4L, 4L, 4L, 4L, 6L, 6L, 4L,
4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
3L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 3L, 5L, 5L, 5L, 5L, 5L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 3L, 3L, 3L, 6L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 6L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 3L, 3L, 6L), .Label = c("1", "2", "3", "4", "5",
"6"), class = "factor"), village = structure(c(9L, 9L, 9L, 9L,
15L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L,
14L, 14L, 2L, 7L, 7L, 7L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 3L, 3L), .Label = c("a",
"b", "c", "d", "e", "f", "g",
"h", "i", "l", "m", "n", "o",
"p", "q", "r"), class = "factor"), distance = c(409,
409, 409, 409, 365, 365, 365, 365, 365, 351, 351, 351, 351, 351,
351, 351, 351, 351, 351, 351, 351, 351, 436, 436, 436, 436, 436,
436, 439, 439, 439, 439, 439, 439, 439, 439, 439, 439, 434, 434,
434, 434, 434, 434, 434, 434, 434, 434, 434, 434, 434, 434, 434,
434, 434, 434, 434, 434, 434, 434, 434, 434, 434, 434, 434, 434,
434, 434, 434, 434, 434, 434, 434, 434, 466, 466, 466, 466, 466,
466, 492, 492, 492, 492, 492, 447, 448, 448, 448, 431, 431, 431,
431, 431, 431, 431, 431, 449, 449, 449, 449, 449, 471, 471, 471,
471, 471, 471, 471, 471, 471, 471, 471, 471, 471, 471, 471, 443,
443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443,
443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443,
443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443, 443,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 423, 423, 423)), .Names = c("role",
"village", "distance"), row.names = c(NA, -221L), class = "data.frame")
library(plyr)
txt <- ddply(h, .(village), summarize, role = max(as.numeric(role)), distance = distance[1])
ggplot(h, aes(x = reorder(village, distance, max), y = role)) + geom_boxplot(aes(group = village)) + geom_text(data = txt, aes(village, role, label = distance), vjust = -1)
So basically you add text with geom_text.