Related
Good morning,
I have a ggplot2 bar graph inside a loop, in which the size of bars changes in every run of the loop:
ggplot(draft, aes(x=draft[,2], y=draft[,i])) +
geom_bar(stat="identity",fill="navyblue") +
geom_text(label=draft[,i],size=4, vjust=1.2, colour = "white",fontface=2) +
labs(title = paste("Session trends for",colnames(draft)[i],"-",player))+
theme(axis.text.x = element_text(angle = 0,color="black",size=8),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(color="black", size=10, face="bold",hjust = 0.5),
legend.position = "none",panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))
In the geom_text part (labels of the bars), as you can see, size=4. This is fine for some graphs, but it is too big/small for others. My question is:
Is there any way to adjust the font size automatically to the size of the bar?
Thank you very much in advance.
Reproducible example.
This is the dataframe from where I represent the data:
> dput(draft)
structure(list(Player = c("Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos"), Date = structure(c(19371, 19370, 19369, 19368,
19367, 19364, 19363, 19362, 19361, 19360, 19359, 19356, 19355,
19354, 19353, 19349), class = "Date"), week = c(29, 29, 29, 29,
29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26), TD = c(6638,
2660, 4761, 4956, 3984, 4001, 3688, 4476, 4616, 4666, 4120, 3782,
3701, 4398, 4275, 3222), Time = c(96, 67, 70, 75, 38, 33, 55,
68, 59, 57, 56, 35, 45, 56, 46, 21), Vmax = c(9.4, 4.7, 8.3,
8.8, 6.9, 9.5, 6.5, 6, 9.2, 7.1, 7.3, 9, 6.5, 6.8, 6.2, 4.6),
Amax = c(4.5, 3.1, 4.1, 4.9, 3.8, 3.8, 4.4, 3.9, 4.5, 4.4,
4, 3.4, 4.3, 3.5, 3.2, 1.5), Dmax = c(-5.9, -2.8, -4.2, -6.2,
-3.9, -2.7, -4.9, -3.5, -2.7, -4.6, -3.5, -2.7, -4.5, -3.7,
-3.8, -1.4), Aerobic = c(462, 44, 589, 280, 175, 546, 333,
831, 303, 959, 225, 125, 194, 188, 534, 665), HSD = c(177,
0, 475, 86, 59, 463, 56, 14, 384, 92, 119, 393, 54, 262,
24, 0), SD = c(100, 0, 78, 39, 0, 149, 0, 0, 125, 3, 6, 141,
0, 0, 0, 0), Nsprints = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DEC3 = c(7, 0, 2, 5, 0,
0, 8, 0, 0, 3, 0, 0, 3, 0, 0, 0), ACC3 = c(4, 0, 1, 4, 0,
0, 9, 0, 3, 4, 0, 0, 9, 0, 0, 0), RHIE = c(15, 3, 19, 10,
5, 5, 17, 6, 6, 10, 10, 3, 10, 5, 5, 2), MIP1 = c(161, 112,
169, 147, 184, 285, 162, 266, 170, 248, 197, 222, 167, 177,
232, 254), MIP3 = c(359, 309, 375, 345, 535, 738, 431, 688,
479, 709, 531, 549, 476, 461, 662, 724), MIP5 = c(565, 473,
560, 537, 868, 1049, 589, 851, 673, 1152, 845, 875, 682,
619, 983, 1166), ACC = c(78, 14, 64, 46, 27, 20, 51, 32,
29, 56, 40, 12, 45, 26, 21, 0), ACC2 = c(18, 1, 14, 15, 3,
9, 24, 9, 16, 12, 9, 6, 25, 4, 3, 0), DEC = c(67, 11, 42,
48, 23, 14, 36, 22, 14, 49, 32, 7, 30, 20, 15, 0), DEC2 = c(21,
0, 12, 14, 5, 0, 20, 4, 0, 11, 4, 0, 16, 1, 2, 0), Explosive = c(12,
9, 15, 13, 5, 1, 45, 11, 3, 8, 16, 0, 14, 11, 6, 0), TRIMP = c(107,
0, 78, 51, 105, 60, 85, 93, 55, 93, 102, 44, 89, 110, 109,
47), TRIMP4 = c(0, 0, 3, 12, 20, 8, 0, 0, 0, 13, 1, 2, 2,
15, 17, 15), TRIMP5 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), TD.min = c(69.3, 39.9, 68.1, 66, 103.6, 121.4,
67.2, 65.7, 78.7, 82.1, 77.5, 109.3, 82.5, 78.8, 92.8, 151
), HRavg = c(66, 0, 65, 65, 79, 69, 67, 65, 60, 67, 74, 70,
71, 71, 75, 73), Vmax.perc = c(100, 50, 88, 93, 73, 101,
69, 64, 98, 75, 78, 95, 70, 72, 66, 49), Amax.perc = c(88,
62, 80, 96, 74, 75, 86, 77, 89, 87, 78, 67, 85, 68, 63, 30
), Dmax.perc = c(104, 50, 74, 109, 69, 48, 85, 61, 47, 81,
61, 47, 79, 65, 66, 25)), row.names = c("21", "211", "22",
"19", "191", "2", "20", "212", "201", "213", "221", "1", "11",
"202", "203", "18"), class = "data.frame")
And this is the bars graph I am building:
ggplot(draft, aes(x=draft[,2], y=draft[,1])) +
geom_bar(stat="identity",fill="navyblue") +
geom_text(label=draft[,i],size=4, vjust=1.2, colour = "white",fontface=2) +
labs(title = paste("Session trends for",colnames(draft)[1],"-",player))+
theme(axis.text.x = element_text(angle = 0,color="black",size=8),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(color="black", size=10, face="bold",hjust = 0.5),
legend.position = "none",panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))
I may have misunderstood, but perhaps this type of approach would work for your use-case?
library(tidyverse)
draft <- structure(list(Player = c("Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos", "Shaquil Delos", "Shaquil Delos", "Shaquil Delos",
"Shaquil Delos"), Date = structure(c(19371, 19370, 19369, 19368,
19367, 19364, 19363, 19362, 19361, 19360, 19359, 19356, 19355,
19354, 19353, 19349), class = "Date"), week = c(29, 29, 29, 29,
29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26), TD = c(6638,
2660, 4761, 4956, 3984, 4001, 3688, 4476, 4616, 4666, 4120, 3782,
3701, 4398, 4275, 3222), Time = c(96, 67, 70, 75, 38, 33, 55,
68, 59, 57, 56, 35, 45, 56, 46, 21), Vmax = c(9.4, 4.7, 8.3,
8.8, 6.9, 9.5, 6.5, 6, 9.2, 7.1, 7.3, 9, 6.5, 6.8, 6.2, 4.6),
Amax = c(4.5, 3.1, 4.1, 4.9, 3.8, 3.8, 4.4, 3.9, 4.5, 4.4,
4, 3.4, 4.3, 3.5, 3.2, 1.5), Dmax = c(-5.9, -2.8, -4.2, -6.2,
-3.9, -2.7, -4.9, -3.5, -2.7, -4.6, -3.5, -2.7, -4.5, -3.7,
-3.8, -1.4), Aerobic = c(462, 44, 589, 280, 175, 546, 333,
831, 303, 959, 225, 125, 194, 188, 534, 665), HSD = c(177,
0, 475, 86, 59, 463, 56, 14, 384, 92, 119, 393, 54, 262,
24, 0), SD = c(100, 0, 78, 39, 0, 149, 0, 0, 125, 3, 6, 141,
0, 0, 0, 0), Nsprints = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DEC3 = c(7, 0, 2, 5, 0,
0, 8, 0, 0, 3, 0, 0, 3, 0, 0, 0), ACC3 = c(4, 0, 1, 4, 0,
0, 9, 0, 3, 4, 0, 0, 9, 0, 0, 0), RHIE = c(15, 3, 19, 10,
5, 5, 17, 6, 6, 10, 10, 3, 10, 5, 5, 2), MIP1 = c(161, 112,
169, 147, 184, 285, 162, 266, 170, 248, 197, 222, 167, 177,
232, 254), MIP3 = c(359, 309, 375, 345, 535, 738, 431, 688,
479, 709, 531, 549, 476, 461, 662, 724), MIP5 = c(565, 473,
560, 537, 868, 1049, 589, 851, 673, 1152, 845, 875, 682,
619, 983, 1166), ACC = c(78, 14, 64, 46, 27, 20, 51, 32,
29, 56, 40, 12, 45, 26, 21, 0), ACC2 = c(18, 1, 14, 15, 3,
9, 24, 9, 16, 12, 9, 6, 25, 4, 3, 0), DEC = c(67, 11, 42,
48, 23, 14, 36, 22, 14, 49, 32, 7, 30, 20, 15, 0), DEC2 = c(21,
0, 12, 14, 5, 0, 20, 4, 0, 11, 4, 0, 16, 1, 2, 0), Explosive = c(12,
9, 15, 13, 5, 1, 45, 11, 3, 8, 16, 0, 14, 11, 6, 0), TRIMP = c(107,
0, 78, 51, 105, 60, 85, 93, 55, 93, 102, 44, 89, 110, 109,
47), TRIMP4 = c(0, 0, 3, 12, 20, 8, 0, 0, 0, 13, 1, 2, 2,
15, 17, 15), TRIMP5 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), TD.min = c(69.3, 39.9, 68.1, 66, 103.6, 121.4,
67.2, 65.7, 78.7, 82.1, 77.5, 109.3, 82.5, 78.8, 92.8, 151
), HRavg = c(66, 0, 65, 65, 79, 69, 67, 65, 60, 67, 74, 70,
71, 71, 75, 73), Vmax.perc = c(100, 50, 88, 93, 73, 101,
69, 64, 98, 75, 78, 95, 70, 72, 66, 49), Amax.perc = c(88,
62, 80, 96, 74, 75, 86, 77, 89, 87, 78, 67, 85, 68, 63, 30
), Dmax.perc = c(104, 50, 74, 109, 69, 48, 85, 61, 47, 81,
61, 47, 79, 65, 66, 25)), row.names = c("21", "211", "22",
"19", "191", "2", "20", "212", "201", "213", "221", "1", "11",
"202", "203", "18"), class = "data.frame")
for (i in 4:31) {
print(ggplot(draft, aes(x=draft[,2], y=draft[,i])) +
geom_bar(stat="identity",fill="navyblue") +
geom_text(label=as.integer(draft[,i]), size=5-nchar(as.integer(draft[3,i])),
vjust=1.2, colour = "white",fontface=2) +
labs(title = paste("Session trends for", colnames(draft)[i],"-", "player"))+
theme(axis.text.x = element_text(angle = 0,color="black",size=8),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(color="black", size=10, face="bold",hjust = 0.5),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")))
}
Created on 2023-01-16 with reprex v2.0.2
I am trying to bind two data frames which has more than 600 column and each of those 600 columns are unique names.
I want to add the two data frame one below the other based on the column names, I can do it when there are a few columns but I am not sure how to about with it with more than 600 columns as writing the 600 columns names for merge is quite difficult. Can someone help me out?
Attached is the simplified data of both data frame.
This is one of the data frame called pd:
structure(list(ds = c("2019-01-01", "2019-02-01", "2019-03-01",
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
"2022-08-01"), X1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
85, 72, 111, 96, 50, 95, 48, 87, 75, 249, 173, 74, 86, 127, 209,
92, 137, 49, 84, 75, 73, 376, 196, 91, 107, 124, 177, 244, 275,
100, 176), X2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 29, 243,
281, 262, 283, 0, 264, 104, 289, 41, 76), X3 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 157, 171, 377, 409, 375, 314, 253, 322,
130, 472, 115, 179)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -44L))
This is the other data frame, pd1
structure(list(ds = structure(c(19236, 19266, 19297, 19327, 19358,
19389, 19417, 19448, 19478), class = "Date"), X1 = structure(c(103.045278486668,
103.045278486668, 103.045278486668, 103.045278486668, 103.045278486668,
103.045278486668, 103.045278486668, 103.045278486668, 103.045278486668
), tsp = c(2022.66666666667, 2023.33333333333, 12), class = "ts"),
X2 = structure(c(9.97152706820806, 9.97152706820806, 9.97152706820806,
9.97152706820806, 9.97152706820806, 9.97152706820806, 9.97152706820806,
9.97152706820806, 9.97152706820806), tsp = c(2022.66666666667,
2023.33333333333, 12), class = "ts"), X3 = structure(c(21.2001463509872,
21.2001463509872, 21.2001463509872, 21.2001463509872, 21.2001463509872,
21.2001463509872, 21.2001463509872, 21.2001463509872, 21.2001463509872
), tsp = c(2022.66666666667, 2023.33333333333, 12), class = "ts")), class = "data.frame", row.names = c(NA,
-9L))
If they have a similar structure, you can simply use rbind?
rbind(pd, pd1)
Courtesy of #jay.sf in the comments below: The rbind function will automatically combine the columns with corresponding names.
I'm trying to extract slope values from a number of linear regression models. I plotting acetone emission against water content on different days.
I have these graphs and models
I have tried to extract the slope values using this code:
Library(broom)
Library(tidyverse)
lm_table <- df %>%
nest_by(days) %>%
summarise(mdl = list(lm(water_content ~ acetone, data)), .groups = "drop") %>%
mutate(adjrsquared = map_dbl(mdl, ~summary(.)$adj.r.squared ),
mdl = map(mdl, broom::tidy)) %>%
unnest(mdl)%>%
filter(term=="acetone")
and also this code:
lm_table2 <- df %>%
nest_by(days) %>%
mutate(model = list(lm(water_content ~ acetone, data)),
coefficients2 = list(tidy(model)))
coefficients2 = lm_table2 %>%
unnest(coefficients2)
Both codes however give different slope values than what I get from the graphs. Any ideas as to why that is?
Here's the data
df <- structure(list(i.x45.03 = c(22, 17, 11, 1782, 1767, 250, 3568,
79, 219, 855, 12009, 395, 1552, 705, 2282, 84, 3396, 252, 2058,
1480, 5, 745, 2573, 1005, 946, 3320, 5406, 2192, 20, 1207, 9519,
66, 463, 250, 1095, 16556, 88, 2695, 275, 16, 1577, 29, 3221,
25, 6295, 2, 63, 123, 8, 1, 37, 5308, 4546, 994, 4567, 421, 0,
1938, 19480, 1027, 3474, 1982, 2819, 69, 27733, 2152, 15429,
996, 8, 3435, 8748, 17062, 269, 26188, 35823, 2572, 67, 761,
13493, 1, 1, 1, 16, 9, 29, 89, 20, 11, 21644, 3, 37, 13, 0, 0,
0, 0, 3, 30, 19, 0, 0, 242, 7246, 1, 20081, 77, 0, 0, 0, 5878,
0, 0, 22, 2, 4, 1, 93, 12, 2, 73, 0, 19, 0, 0, 2, 48, 3, 0, 0,
0, 0, 22, 4, 0, 0, 0, 0, 0, 0, 1, 87, 0, 0, 3, 0, 0, 4, 1, 0,
82, 7, 0, 0, 0, 7, 22, 34, 17, 0, 0, 0, 0, 0, 2, 19, 3, 0, 990,
0, 0, 0, 0, 84, 9, 0, 5, 1246, 1944, 633, 23640, 262, 5399, 83,
19, 4417, 125, 7801, 69, 6755, 6, 39, 262), i.water_content_percent_es = c(98,
39, 85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8,
17, 10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19,
32, 40, 79, 22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39,
85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17,
10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 98, 23, 8, 17, 10, 75,
52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17, 10, 75, 52,
13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79, 22,
49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91), daysincubated4 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4), days = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), water_content = c(98,
39, 85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8,
17, 10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19,
32, 40, 79, 22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39,
85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17,
10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 98, 23, 8, 17, 10, 75,
52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17, 10, 75, 52,
13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79, 22,
49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91), acetone = c(22,
17, 11, 1782, 1767, 250, 3568, 79, 219, 855, 12009, 395, 1552,
705, 2282, 84, 3396, 252, 2058, 1480, 5, 745, 2573, 1005, 946,
3320, 5406, 2192, 20, 1207, 9519, 66, 463, 250, 1095, 16556,
88, 2695, 275, 16, 1577, 29, 3221, 25, 6295, 2, 63, 123, 8, 1,
37, 5308, 4546, 994, 4567, 421, 0, 1938, 19480, 1027, 3474, 1982,
2819, 69, 27733, 2152, 15429, 996, 8, 3435, 8748, 17062, 269,
26188, 35823, 2572, 67, 761, 13493, 1, 1, 1, 16, 9, 29, 89, 20,
11, 21644, 3, 37, 13, 0, 0, 0, 0, 3, 30, 19, 0, 0, 242, 7246,
1, 20081, 77, 0, 0, 0, 5878, 0, 0, 22, 2, 4, 1, 93, 12, 2, 73,
0, 19, 0, 0, 2, 48, 3, 0, 0, 0, 0, 22, 4, 0, 0, 0, 0, 0, 0, 1,
87, 0, 0, 3, 0, 0, 4, 1, 0, 82, 7, 0, 0, 0, 7, 22, 34, 17, 0,
0, 0, 0, 0, 2, 19, 3, 0, 990, 0, 0, 0, 0, 84, 9, 0, 5, 1246,
1944, 633, 23640, 262, 5399, 83, 19, 4417, 125, 7801, 69, 6755,
6, 39, 262)), row.names = c(NA, -192L), class = "data.frame")
and the code for the graph I've made is:
library(ggpmisc)
library(tidyverse)
formula <- y~x
ggplot(df, aes(water_content, acetone)) +
geom_point() +
geom_smooth(method = "lm",formula = y~x) +
theme_bw()+
facet_wrap(~days, scales = "free")+
stat_poly_eq(
aes(label = paste(stat(adj.rr.label), stat(eq.label), stat(p.value.label), sep = "*\", \"*")),
formula = formula, parse = TRUE, size=3)
Any ideas why I don't get the same slope values?
All help is much appreciated!
You swapped x and y. Possibly because of using complex 'tidyverse' coding this was not obvious.
library(nlme)
lmList(acetone ~ water_content | days, data = df)
gives
Call:
Model: acetone ~ water_content | days
Data: df
Coefficients:
(Intercept) water_content
0 3314.26811 -31.663431
4 12046.87296 -154.277916
24 3103.13075 -44.368527
116 63.82385 -0.792739
Degrees of freedom: 192 total; 184 residual
Residual standard error: 4538.636
I've tried all sorts of different manipulations, but my basic problem is this:
url<- "http://www.ref.org.uk/fuel/tablebysp.php?valdate=2015-03-08"
data <- readHTMLTable(url,header = TRUE,as.data.frame =TRUE,which=2)
typeof(data)
My data looks great, but I cannot coerce it into a data frame. I don't know what is stopping me.
As pointed out in the post comments, your code actually works fine. You can get strings instead of factors with:
url<- "http://www.ref.org.uk/fuel/tablebysp.php?valdate=2015-03-08"
data <- readHTMLTable(url, header=TRUE, as.data.frame=TRUE, which=2,
stringsAsFactors=FALSE)
Below is how to do this with the rvest package, which really shines for this, especially when there are multiple tables or weirdly nested ones. And, there's some dplyr
In this case, there is more than one table and the second one is what you want. Thankfully, it's pretty well-formed. The code below extracts all the tables from the page (with CSS selectors) then uses the handy magrittr extract2to avoid what would be weird/ugly [[]] usage.
The piping idiom (which started with magrittr and is now used in much of the hadleyverse) "pushes" or "flows" data from left to right vs "pops" data out from nested parenthesis calls.
library(rvest)
library(magrittr)
library(dplyr)
pg <- html("http://www.ref.org.uk/fuel/tablebysp.php?valdate=2015-03-08")
dat <- pg %>% html_nodes("table") %>% extract2(2) %>% html_table(header=TRUE)
glimpse(dat)
## Observations: 48
## Variables:
## $ SD (chr) "2015-03-08", "2015-03-08", "2015-03-08", "2015-03-08", "2015-03-08", "2015-03-08", "...
## $ SP (int) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24...
## $ Gas (chr) "3,467", "3,522", "3,594", "3,529", "2,811", "2,538", "2,520", "2,489", "2,498", "2,5...
## $ Coal (chr) "8,261", "8,062", "7,876", "7,437", "6,751", "6,799", "6,621", "6,428", "6,586", "6,2...
## $ Nuclear (chr) "7,495", "7,553", "7,641", "7,676", "7,674", "7,672", "7,676", "7,677", "7,672", "7,6...
## $ Hydro (int) 737, 729, 666, 651, 646, 647, 645, 648, 658, 729, 734, 736, 740, 738, 740, 741, 751, ...
## $ Net Pumped (chr) "-438", "-84", "-504", "-860", "-1,092", "-1,118", "-1,396", "-1,700", "-1,606", "-1,...
## $ Wind (chr) "4,675", "4,795", "4,623", "4,572", "4,647", "4,570", "4,377", "4,445", "4,602", "4,5...
## $ OCGT (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ Oil (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ Biomass (chr) "1,078", "1,079", "1,081", "1,048", "1,005", "1,022", "1,086", "1,035", "1,072", "1,0...
## $ French Int (chr) "480", "480", "680", "678", "1,614", "1,626", "1,532", "1,536", "772", "772", "504", ...
## $ Dutch Int (chr) "860", "852", "874", "838", "850", "866", "848", "830", "830", "866", "862", "862", "...
## $ NI Int (int) 22, -72, 2, -16, -30, -50, 4, 4, -122, -138, -108, -114, -2, 16, 24, 24, 28, -30, -42...
## $ Eire Int (int) 170, 190, 142, 142, 142, 114, 114, 114, 114, 112, 88, 50, 16, 16, 16, 42, 18, -72, -1...
## $ Net Supply (chr) "26,807", "27,106", "26,675", "25,695", "25,018", "24,686", "24,027", "23,506", "23,0...
You can alternatively do:
html_table(extract2(html_nodes(pg, "table"), 2), header=TRUE)
if you don't like or generally use pipes.
You can then do some basic cleanup of columns to get useful numeric/date values:
dat %>%
mutate(SD=as.Date(SD),
Gas=as.numeric(gsub(",", "", Gas)),
Coal=as.numeric(gsub(",", "", Coal)),
Nuclear=as.numeric(gsub(",", "", Nuclear)),
`Net Pumped`=as.numeric(gsub(",", "", `Net Pumped`)),
`Wind`=as.numeric(gsub(",", "", `Wind`)),
Biomass=as.numeric(gsub(",", "", Biomass)),
`French Int`=as.numeric(gsub(",", "", `French Int`)),
`Dutch Int`=as.numeric(gsub(",", "", `Dutch Int`)),
`Net Supply`=as.numeric(gsub(",", "", `Net Supply`))) -> dat
glimpse(dat)
## Observations: 48
## Variables:
## $ SD (date) 2015-03-08, 2015-03-08, 2015-03-08, 2015-03-08, 2015-03-08, 2015-03-08, 2015-03-08, ...
## $ SP (int) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24...
## $ Gas (dbl) 3467, 3522, 3594, 3529, 2811, 2538, 2520, 2489, 2498, 2543, 2531, 2522, 2627, 2729, 2...
## $ Coal (dbl) 8261, 8062, 7876, 7437, 6751, 6799, 6621, 6428, 6586, 6229, 6194, 6299, 6455, 6639, 6...
## $ Nuclear (dbl) 7495, 7553, 7641, 7676, 7674, 7672, 7676, 7677, 7672, 7670, 7673, 7677, 7677, 7681, 7...
## $ Hydro (int) 737, 729, 666, 651, 646, 647, 645, 648, 658, 729, 734, 736, 740, 738, 740, 741, 751, ...
## $ Net Pumped (dbl) -438, -84, -504, -860, -1092, -1118, -1396, -1700, -1606, -1632, -1344, -1052, -1342,...
## $ Wind (dbl) 4675, 4795, 4623, 4572, 4647, 4570, 4377, 4445, 4602, 4570, 4529, 4512, 4312, 3976, 3...
## $ OCGT (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ Oil (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ Biomass (dbl) 1078, 1079, 1081, 1048, 1005, 1022, 1086, 1035, 1072, 1086, 1084, 1085, 1086, 1082, 1...
## $ French Int (dbl) 480, 480, 680, 678, 1614, 1626, 1532, 1536, 772, 772, 504, 502, 1598, 1602, 1878, 188...
## $ Dutch Int (dbl) 860, 852, 874, 838, 850, 866, 848, 830, 830, 866, 862, 862, 884, 846, 942, 914, 1032,...
## $ NI Int (int) 22, -72, 2, -16, -30, -50, 4, 4, -122, -138, -108, -114, -2, 16, 24, 24, 28, -30, -42...
## $ Eire Int (int) 170, 190, 142, 142, 142, 114, 114, 114, 114, 112, 88, 50, 16, 16, 16, 42, 18, -72, -1...
## $ Net Supply (dbl) 26807, 27106, 26675, 25695, 25018, 24686, 24027, 23506, 23076, 22807, 22747, 23079, 2...
I'm trying to do something a little bit complicated for a beginner in programming.
I have a matrix 16x16 and I want to plot the values as a heatmap using image() in R.
How can I plot the "0" (zeros) in blue when the sum (row index + column index) is <= 15? Is that possible?
example matrix:
x <- c(3045, 893, 692, 830, 617, 155, 246, 657, 105, 60, 18, 7, 7, 4, 2, 11234,
2985, 2242, 2471, 1575, 366, 503, 1283, 170, 79, 32, 6, 4, 1, 3, 19475, 4756,
3233, 3251, 1810, 409, 575, 1210, 139, 41, 11, 4, 2, 0, 0, 20830, 4739, 2990,
2531, 1346, 298, 325, 612, 60, 17, 1, 0, 1, 0, 0, 15304, 3196, 1885, 1440, 610,
117, 115, 185, 14, 2, 0, 0, 0, 0, 0, 8026, 1535, 806, 539, 223, 33, 37, 39, 0,
0, 0, 0, 0, 0, 0, 3300, 562, 286, 141, 45, 14, 5, 12, 0, 0, 0, 0, 0, 0, 0, 1067,
160, 65, 40, 14, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 277, 47, 6, 2, 1, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 72, 6, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 5, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
xmat <- matrix(x, ncol = 12)
xmat <- cbind(xmat, rep(0,16), rep(0,16), rep(0,16), rep(0,16))
xmat <- rbind(xmat, rep(0,16))
dimnames(xmat) = list(0:15, 0:15)
xmat
Thanks!
Vitor
Plot the cases meeting the criteria as blue.
xmat.new <- xmat
xmat.new[!((row(xmat) + col(xmat) <= 15) & xmat==0)] <- NA
image(xmat.new,col="blue")
Plot the cases not meeting the criteria as normal. Notice the add=TRUE
xmat.new <- xmat
xmat.new[((row(xmat) + col(xmat) <= 15) & xmat==0)] <- NA
image(xmat.new,add=TRUE)
Result:
Edited to include #Marek's suggestion to simplify the statements.