plotting a graph with multiple bars in R - r

I am struggling to plot the following data and think it is because of the format of the data.
structure(list(HE_Provider = c("Coventry University", "The University of Leicester",
"Total"), Bath_and_North_East_Somerset = c(15, 20, 205), Bedford = c(85,
90, 1040), Blackburn_with_Darwen = c(10, 20, 95), Blackpool = c(10,
5, 60), `Bournemouth,_Poole_and_Christchurch` = c(35, 15, 285
), Bracknell_Forest = c(15, 10, 210), Buckinghamshire = c(195,
145, 1835), Cambridgeshire = c(130, 160, 2500), Central_Bedfordshire = c(115,
70, 1120), Cheshire_East = c(45, 55, 935), Cheshire_West_and_Chester = c(25,
40, 535), City_of_Bristol = c(40, 35, 390), City_of_Derby = c(65,
135, 4115), City_of_Kingston_upon_Hull = c(25, 20, 265), City_of_Leicester = c(315,
1275, 6860), City_of_Nottingham = c(65, 145, 5405), City_of_Plymouth = c(15,
10, 135), City_of_Portsmouth = c(15, 15, 130), City_of_Southampton = c(15,
20, 140), `City_of_Stoke-on-Trent` = c(50, 15, 475), City_of_York = c(35,
20, 350), Cornwall = c(25, 25, 300), County_Durham = c(20, 40,
330), Cumbria = c(30, 20, 305), Darlington = c(0, 15, 110), Derbyshire = c(100,
145, 6925), Devon = c(50, 50, 630), Dorset = c(30, 20, 285),
East_Riding_of_Yorkshire = c(75, 45, 760), East_Sussex = c(55,
50, 650), Essex = c(365, 180, 3320), Gloucestershire = c(150,
85, 905), Greater_London = c(5550, 1930, 18285), Greater_Manchester = c(245,
280, 2820), Halton = c(5, 10, 80), Hampshire = c(180, 120,
1485), Hartlepool = c(5, 10, 55), Herefordshire = c(50, 15,
235), Hertfordshire = c(385, 270, 4815), Isle_of_Wight = c(10,
5, 90), Isles_of_Scilly = c(0, 0, 0), Kent = c(365, 195,
2590), Lancashire = c(75, 125, 985), Leicestershire = c(540,
980, 8010), Lincolnshire = c(145, 190, 7710), Luton = c(105,
75, 685), Medway = c(95, 35, 425), Merseyside = c(75, 120,
975), Middlesbrough = c(10, 5, 65), Milton_Keynes = c(265,
170, 2205), Norfolk = c(120, 115, 2410), North_East_Lincolnshire = c(20,
10, 810), North_Lincolnshire = c(20, 20, 810), North_Somerset = c(25,
15, 205), North_Yorkshire = c(500, 80, 1160), Northamptonshire = c(680,
510, 7505), Northumberland = c(10, 25, 235), Nottinghamshire = c(140,
185, 9410), Oxfordshire = c(280, 135, 1785), Peterborough = c(85,
135, 1560), Reading = c(75, 25, 260), Redcar_and_Cleveland = c(5,
5, 90), Rutland = c(5, 35, 345), Shropshire = c(60, 30, 500
), Slough = c(95, 40, 270), Somerset = c(40, 40, 490), South_Gloucestershire = c(40,
25, 310), South_Yorkshire = c(105, 180, 3220), `Southend-on-Sea` = c(35,
25, 345), Staffordshire = c(370, 150, 3825), `Stockton-on-Tees` = c(20,
15, 145), Suffolk = c(115, 115, 1935), Surrey = c(195, 155,
2900), Swindon = c(50, 25, 225), Telford_and_Wrekin = c(60,
20, 360), Thurrock = c(140, 40, 370), Torbay = c(5, 5, 65
), Tyne_and_Wear = c(45, 60, 680), Warrington = c(20, 20,
290), Warwickshire = c(2080, 210, 2825), West_Berkshire = c(35,
25, 300), West_Midlands = c(8315, 915, 8220), West_Sussex = c(105,
95, 1115), West_Yorkshire = c(200, 245, 3005), Wiltshire = c(90,
55, 630), Windsor_and_Maidenhead = c(40, 25, 405), Wokingham = c(70,
35, 395), Worcestershire = c(350, 110, 1350), `England_(county_unitary_authority_unknown)` = c(0,
10, 770), Total_England = c(24990, 11530, 154930), Total = c(25380,
11845, 158480)), row.names = c(NA, -3L), class = "data.frame")
I would like to plot the Region on the bottom but don't have a title for these regions, with the numbers up the y axis and the fill being the university.

This type of problems generally has to do with reshaping the data. The format should be the long format and the data is in wide format. See this post on how to reshape the data from wide to long format.
Reshape the data and plot with geom_col.
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(ggplot2)
})
df1 %>%
select(-matches("England"), -matches("Total")) %>%
pivot_longer(-HE_Provider, names_to = "Region") %>%
ggplot(aes(Region, value, fill = HE_Provider)) +
geom_col() +
theme_bw(base_size = 10) +
theme(axis.text.x = element_text(size = 7, angle = 75, vjust = 1, hjust = 1),
legend.position = "bottom")
Created on 2022-12-06 with reprex v2.0.2

We could bring the data in long format. For y we used log scale:
library(tidyverse)
df %>%
pivot_longer(-HE_Provider) %>%
group_by(HE_Provider, name) %>%
summarise(sum_value = sum(value)) %>%
ggplot(aes(x=name, y=log(sum_value), fill=HE_Provider))+
geom_col(position=position_dodge())+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))

Related

Surface in plotly does not cover all data, leaving a gap between surface and highlight

I am using a plotly surface plot with data that has some missing values.
As you can see in the example below, I am using highlight lines to show the surface does not reach the highlight, leaving a weird empty gap. It is not a matter of perspective, as the gap also shows in a cenital plane.
To be more specific, below I am hovering on row 12, column 2006, and although the missing data starts in row 13, in the plot the missing data seems to start before row 12 ("row 11.9"). My expectation would be that the purple surface would reach all the way to the bright blue highlight in row 12.
Is this a bug, or there is a parameter to make sure this does not happen?
Thanks!
library(dplyr)
library(plotly)
DF_RAW = structure(c(181, 163, 60, 124, 76, 62, 73, 59, 17, 21, 26, 7, NA, NA, NA,
188, 145, 61, 130, 61, 59, 62, 57, 20, 22, 22, 6, NA, NA, NA,
137, 154, 54, 191, 75, 56, 65, 56, 22, 27, 33, 14, NA, NA, NA,
126, 185, 65, 109, 51, 71, 57, 38, 25, 23, 21, 10, NA, NA, NA,
150, 144, 44, 123, 58, 24, 48, 41, 19, 26, 21, 5, NA, NA, NA,
138, 137, 61, 130, 67, 34, 60, 44, 19, 21, 16, 4, NA, NA, NA,
121, 146, 101, 92, 70, 74, 88, 33, 18, 39, 24, 12, NA, NA, NA,
NA, 160, 129, 117, 70, 61, 42, 35, 22, 25, 21, 7, 10, 23, 8,
NA, 129, 130, 107, 64, 61, 44, 25, 23, 30, 18, 11, 20, 58, 40,
NA, 136, 131, 96, 53, 31, 51, 37, 43, 31, 19, 2, 22, 40, 41,
NA, 124, 154, 74, 62, 44, 34, 15, 26, 23, 20, 6, 23, 10, 19,
NA, 126, 251, 76, 73, 84, 47, 40, 32, 25, 32, 6, 13, 10, 13,
NA, 129, 194, 91, 53, 99, 46, 34, 60, 21, 17, 6, 14, 14, 26,
NA, 115, 119, 88, 64, 108, 37, 24, 49, 26, 17, 6, 15, 15, 47),
.Dim = 15:14,
.Dimnames = list(c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"),
c("2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019")))
DF = DF_RAW
plot1 = plotly::plot_ly(x = ~ colnames(DF),
y = ~ rownames(DF),
z = ~ DF) %>%
plotly::add_surface(name = "3D mesh",
connectgaps = TRUE, hidesurface = TRUE,
contours = list(
x = list(show = TRUE, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = TRUE),
y = list(show = TRUE, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = TRUE),
z = list(show = FALSE, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = FALSE)
)) %>%
plotly::add_surface(name = "surface",
connectgaps = FALSE,
contours = list(
x = list(show = F, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = TRUE),
y = list(show = F, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = TRUE),
z = list(show = FALSE, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = FALSE)
)
)
plot1
EDIT:To emphasize that this is not a matter of perspective, here a cenital view of the plot. The gap is still visible.
plot1 %>%
plotly::layout(
scene = list(
camera = list(
eye = list(x = 0, y = 0, z = 2),
center = list(x = 0, y = 0, z = 0),
up = list(x = 0, y = 0, z = 1)
)
)
)
And if we get rid of the 3d mesh and show only the surface with the highlight, see how in row 11 (right) is very clear we have all the data (blue highlight goes all the way from top to bottom) but in row 12 it seems we only have data up to 2013 (blue line stops there).
plotly::plot_ly(x = ~ colnames(DF),
y = ~ rownames(DF),
z = ~ DF, showscale = FALSE) %>%
plotly::add_surface(name = "surface",
connectgaps = FALSE,
contours = list(
x = list(show = F, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = TRUE),
y = list(show = F, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = TRUE),
z = list(show = FALSE, width = 1, highlightwidth = 2, highlightcolor = "#41a7b3", highlight = FALSE)
)
) %>%
plotly::layout(
scene = list(
xaxis = list(showspikes = FALSE),
yaxis = list(showspikes = FALSE),
zaxis = list(showspikes = FALSE),
camera = list(
eye = list(x = 0, y = 0, z = 2),
center = list(x = 0, y = 0, z = 0),
up = list(x = 0, y = 0, z = 1)
)
)
)

Is there a kable() function for adding a header below a table?

I want to add a header below my table with kableExtra, is this possible or is there an easy work around? I want to add two header (rows below table) last one spanning from column 2-13 with "Confidence Level C" and second last row with confidence levels 50%, 60%, 70% ... 99.99% Thanks
options(digits = 4)
library(knitr)
library(kableExtra)
library(latex2exp)
library(tidyverse)
# critical t-values
t.table = tibble(r.tail.25 = qt(.75, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.20 = qt(.80, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.15 = qt(.85, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.10 = qt(.90, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.05 = qt(.95, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.025 = qt(.975, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.02 = qt(.98, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.01 = qt(.99, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.005 = qt(.995, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.0025 = qt(.9975, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.001 = qt(.999, df= c(1:30, 40, 50, 60, 80, 100, 1000)),
r.tail.0005 = qt(.999, df= c(1:30, 40, 50, 60, 80, 100, 1000)))
z_values = qnorm(c(.75, .80, .85, .90, .95, .975,
.98, .99, .995, .9975, .999, .9995), 0, 1)
z_values_row <- as.data.frame(t(z_values))
colnames(z_values_row) = colnames(t.table)
t.table = t.table %>%
bind_rows(z_values_row)
t.table = t.table %>%
mutate(df = c(1:30,40,50,60, 80, 100, 1000, "$z^{*}$")) %>%
select(df, everything())
# Create students t table
kable(t.table,
booktabs = TRUE,
col.names = c("**_df_**", ".25", ".20",".15",".10", ".05", ".025", ".02",
".01", "0.005", ".0025", ".001", ".0005"),
escape = FALSE,
caption = "t verdeling kritieke t-waarden",
linesep = "",
align = c('r'),
digits=3) %>%
kable_styling(font_size = 10) %>%
column_spec(1, bold = T, border_right = T) %>%
add_header_above(c(" " = 1, "Upper tail probability $p$" = 12))

How to add or annotate Latex formula as annotation in boxplot() in R?

I want to annotate my boxplot (create in Base R) with some text and latex formula's, I tried with $..formula..$, but didn't work. Does anyone know a solution?
i = c(1:20)
X = c(13, 18, 25, 58, 25, 31, 39, 42, 17, 35, 46, 22, 18, 20, 26, 14, 33, 19, 20, 21)
df = data.frame(i, X)
boxplot(df$X, data=df, main="Belminuten Data",
xlab=" ", ylab="Aantal Belminuten",
frame = FALSE,
ylimit = c(10, 60),
range=3)
text(x = c(1.3), y = 60, "n = 20") # n should be in italic or in formula style
text(x = c(.7), y = 23.5, "Med = 23.5")
text(x = c(.7), y = 18.5, "Q_1 = 18.5")
library(latex2exp)
i = c(1:20)
X = c(13, 18, 25, 58, 25, 31, 39, 42, 17, 35, 46, 22, 18, 20, 26, 14,
33, 19, 20, 21)
df = data.frame(i, X)
boxplot(df$X, data=df, main="Belminuten Data",
xlab=" ", ylab="Aantal Belminuten",
frame = FALSE,
ylimit = c(10, 60),
range=3)
text(x = c(1.3), y = 60, TeX('$n = 20$'))
text(x = c(.7), y = 13.0, TeX('$Min = 13$'))
text(x = c(.7), y = 18.5, TeX('$Q_1 = 18.5$'))
text(x = c(.7), y = 23.5, TeX('$Med = 23.5$'))
text(x = c(.7), y = 34.0, TeX('$Q_3 = 34$'))
text(x = c(.7), y = 58.0, TeX('$Max = 58$'))

How to label a ternary plot

I am trying to create a triangular plot,that three dimensions of which represent three herbal strategies.
One dimension represents the strategy of C (competitive plant), the second dimension “S” (stress tolerant plants) and the third dimension ”R” (ruderal plants), the points on it represent the plant species.
I want to write the species name outside the triangle and connect it to the points inside the triangle with an arrow. How do I draw this ternary plot?
The following is the data structure and my code
require(Ternary)
TernaryPlot()
#Plot two stylised plots side by side, and plot data
par(mfrow=c(1, 1), mar=rep(0.3, 4))
TernaryPlot(atip='C%', btip='R%', ctip='S%',
point='UP', lab.cex=0.8, grid.minor.lines=0,
grid.lty='solid', col='#FFFFFF', grid.col='GREY',
axis.col=rgb(0.1, 0.1, 0.1), ticks.col=rgb(0.1, 0.1, 0.1),
padding=0.08)
data_points <- list("Bromus dantonia" = c(47, 59, 149),
"Calamagrosis psoudo phragmatis" = c(90, 102, 63),
"Carex diluta" = c(109, 64, 82),
"Carex divisa" = c(96, 99, 59),
"Carex pseudocyperus" = c(130, 71, 54),
"Carex stenophylla" = c(97, 98, 59),
"Catabrosa aquatica" = c(100, 5, 150),
"Centaurea iberica" = c(124, 85, 46),
"Cirsium hygrophilum" = c(158, 42, 55),
"Cladium mariscus" = c(159, 96, 0),
"cod2" = c(54, 82, 119),
"Cynodon dactylon" = c(121, 54, 80),
"Eleocharis palustri" = c(124, 100, 31),
"Epilobium parviflorum" = c(67, 80, 107),
"Eromopoa persica" = c(83, 15, 157),
"Funaria cf.microstoma" = c(8, 0, 247),
"Glaux maritime" = c(4, 196, 55),
"Hordeum brevisubulatum" = c(76, 70, 109),
"Hordeum glaucum" = c(40, 79, 136),
"Inula britannica" = c(95, 108, 51),
"Juncus articulatus" = c(107, 79, 69),
"Blysmus compressus" = c(81, 127, 47),
"Juncusinflexus"= c(149, 106, 0),
"Medicago polymorpha" = c(60, 86, 109),
"Mentha spicata" = c(150, 23, 82),
"Ononis spinosa" = c(66, 112, 77),
"Phragmites australis" = c(234, 0, 21),
"Plantago amplexicaulis" = c(108, 83, 64),
"Poa trivialis" = c(90, 28, 138),
"Polygonum paronychioides" = c(20, 12, 223),
"Potentila reptans" = c(106, 41, 108),
"Potentilla anserina" = c(105, 58, 91),
"Ranunculus grandiflorus" = c(129, 25, 101),
"Schoenus nigricans" = c(143, 91, 21),
"Setaria viridis" = c(10, 7, 238),
"Sonchus oleraceus" = c(178, 0, 77),
"Taraxacum officinale" = c(117, 28, 110),
"Trifolium repens" = c(94, 4, 157),
"Triglochin martima" = c(63, 96, 95),
"Veronica anagallis-aquatica" = c(55, 37, 163)
)
AddToTernary(points, data_points, pch=21, cex=1.2,
bg=vapply(data_points,
function (x) rgb(x[1], x[2], x[3], 128,
maxColorValue=255),
character(1))
)
AddToTernary(text, data_points, names(data_points), cex=0.8, font=1)

Change ticks position inwards in ggplot

I want to change the position of the ticks in a ggplot plot into an inward position. The axis.ticks.margin is deprecated, so I have tried to play around with axis.text function but could not manage to do it. Anyone can help me out?
Here is an example dataset df
df<- structure(list(X1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41), X2 = c(0,
10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140,
150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270,
280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400
)), .Names = c("X1", "X2"), row.names = c(NA, -41L), class = "data.frame")
Below is what I have tried so far:
library(ggplot2)
ggplot(df, aes(x=Age, y=GPP)) +
geom_point()+
theme(panel.grid = element_blank(),
element_text(hjust=seq(from=0,to=1,length.out=6)),
axis.text.y = element_text(margin=margin(5,5,10,5,"pt")))
ggplot(df, aes(x=Age, y=GPP)) +
geom_point()+
theme(panel.grid = element_blank(),
axis.ticks.length=unit(-0.25, "cm"), axis.text.x = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")), axis.text.y = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")))

Resources