ggplot by group with filter() - r

I have big dataset with the following format:
structure(list(LOCATION = c("CAN", "CAN", "CAN", "CAN", "CAN",
"CAN", "CAN", "CAN", "CAN", "CAN"), Country = c("Canada", "Canada",
"Canada", "Canada", "Canada", "Canada", "Canada", "Canada", "Canada",
"Canada"), SUBJECT = c("ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99",
"ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99"
), Subject = c("Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost",
"Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost",
"Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost"),
SECTOR = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Sector = c("Total Economy",
"Total Economy", "Total Economy", "Total Economy", "Total Economy",
"Total Economy", "Total Economy", "Total Economy", "Total Economy",
"Total Economy"), MEASURE = c("ST", "ST", "ST", "ST", "ST",
"ST", "ST", "ST", "ST", "ST"), Measure = c("Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency"), FREQUENCY = c("A",
"A", "A", "A", "A", "A", "A", "A", "A", "A"), Frequency = c("Annual",
"Annual", "Annual", "Annual", "Annual", "Annual", "Annual",
"Annual", "Annual", "Annual"), TIME = 1970:1979, Time = 1970:1979,
Value = c(0.1304592, 0.1357066, 0.1430287, 0.1521136, 0.1752398,
0.2018611, 0.2193767, 0.2347496, 0.2470616, 0.2663881), Flag.Codes = c("E",
"E", "E", "E", "E", "E", "E", "E", "E", "E"), Flags = c("Estimated value",
"Estimated value", "Estimated value", "Estimated value",
"Estimated value", "Estimated value", "Estimated value",
"Estimated value", "Estimated value", "Estimated value")), row.names = c(NA,
10L), class = "data.frame")
And I want to draw time plot like the following (for each sector group in a particular country's particular subject, in this case, Germany's Labour Income Share)
I tried to code as follows:
library(ggplot2)
library(tidyr)
df <- read.csv("/Users/ulc.csv", header = TRUE)
fsector = factor(df$SECTOR)
df %>%
filter(df$MEASURE =="ST",
df$SUBJECT == "ULAIRU99",
df$LOCATION == "DEU") %>%
ggplot(aes(x = df$year, y = df$value, color = fsector, linetype = fsector)) +
scale_color_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
scale_linetype_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
theme(legend.position = c(0.8, 0.3), legend.title = element_blank()) +
ylab("LIS of Germany by sector") + xlab("year")
But the result does not show any plots and seems like a lot of elements are missing in my code. Maybe should I add geom_line() for each sector? But there seems much simpler way. Any help would be appreciated.

You can try the following code -
library(dplyr)
library(ggplot2)
df %>%
filter(MEASURE =="ST",SUBJECT == "ULAIRU99",LOCATION == "DEU") %>%
mutate(SECTOR = factor(SECTOR)) %>%
ggplot(aes(x = TIME, y = Value, color = SECTOR, linetype = SECTOR)) +
geom_line() +
scale_color_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
scale_linetype_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
theme(legend.position = c(0.8, 0.3), legend.title = element_blank()) +
ylab("LIS of Germany by sector") + xlab("year")

Related

Free axis along particular facets in facet_grid ggplot

I am trying to do 2 things:
First and most important is to somehow get the y axis to vary by the outcome_type2 variable, since they all have fairly different ranges. So the "C" have their own axis range, the "Z" have their own axis range, and the "SS" have their own axis range.
Then the secondary thing would be to somehow adjust the spacing of columns, so that there's a bit of space by those same groups--the 3 "C" columns would be close together, with a bit of extra white space between them and the "Z", then same between "Z" and "SS". Just to differentiate a little more between those three groups.
I tried tinkering with faceting on outcome_type2 instead of outcome_type but to no avail.
This is current base code, which technically works fine, but as you'll see, having them all use the same Y axis really swamps the "Z" and "SS" panels.
ggplot(dtest, aes(x = var2, y = avg2, fill = var2)) +
geom_bar(stat = "identity",
width = 1) +
facet_grid(wave ~ forcats::fct_relevel(outcome_type, "CT", "CI", "CE", "FZ", "MZ", "PSS", "CSS"),
scales = "free_y",
space = "free_y") +
theme_minimal() +
theme(legend.position = "none")
dtest <- structure(list(outcome_type = c("CT", "CT", "CT", "CI", "CI",
"CI", "CE", "CE", "CE", "FZ", "FZ", "MZ", "MZ", "PSS", "PSS",
"CSS", "CSS", "CT", "CT", "CT", "CI", "CI", "CI", "CE", "CE",
"CE", "FZ", "FZ", "MZ", "MZ", "PSS", "PSS", "CSS", "CSS"), wave = c("Wave 1",
"Wave 2", "Wave 3", "Wave 1", "Wave 2", "Wave 3", "Wave 1", "Wave 2",
"Wave 3", "Wave 2", "Wave 3", "Wave 2", "Wave 3", "Wave 1", "Wave 3",
"Wave 1", "Wave 3", "Wave 1", "Wave 2", "Wave 3", "Wave 1", "Wave 2",
"Wave 3", "Wave 1", "Wave 2", "Wave 3", "Wave 2", "Wave 3", "Wave 2",
"Wave 3", "Wave 1", "Wave 3", "Wave 1", "Wave 3"), var2 = c("Skipped",
"Skipped", "Skipped", "Skipped", "Skipped", "Skipped", "Skipped",
"Skipped", "Skipped", "Skipped", "Skipped", "Skipped", "Skipped",
"Skipped", "Skipped", "Skipped", "Skipped", "Attended", "Attended",
"Attended", "Attended", "Attended", "Attended", "Attended", "Attended",
"Attended", "Attended", "Attended", "Attended", "Attended", "Attended",
"Attended", "Attended", "Attended"), avg2 = c(30.21, 20.88, 25.43,
7.68, 8.26, 7.89, 11.15, 8, 5.99, 1.64, 0.43, 0.6, 0.77, 0.01,
-0.09, -0.2, -0.01, 24.01, 19.98, 29.04, 9.82, 12.41, 12.99,
14.35, 11.01, 10, 2.36, 2.3, 1.51, 0.91, -0.23, -0.35, -0.17,
-0.14), outcome_type2 = c("C", "C", "C", "C", "C", "C", "C",
"C", "C", "Z", "Z", "Z", "Z", "SS", "SS", "SS", "SS", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "Z", "Z", "Z", "Z", "SS",
"SS", "SS", "SS")), class = c("spec_tbl_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -34L), spec = structure(list(
cols = list(outcome_type = structure(list(), class = c("collector_character",
"collector")), wave = structure(list(), class = c("collector_character",
"collector")), var2 = structure(list(), class = c("collector_character",
"collector")), avg2 = structure(list(), class = c("collector_double",
"collector")), outcome_type2 = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
One option would be to create separate plots for each group of panels and glue them together using patchwork. Doing so you get "free" scale for each group of panels automatically and also have one (and only one) axis for each panel group.
To this end first add a group column to your data which could be used to split your dataset by facet panel group. Additionally, for convenience I use a plotting function which also removes the y axis strip texts for the first two groups of panels and as an important step completes each dataset so that all combinations of wave, outcome_type and var2 are present in each sub-dataset.
library(ggplot2)
library(patchwork)
library(magrittr)
dtest$group <- dplyr::case_when(
grepl("SS$", dtest$outcome_type) ~ "SS",
grepl("Z$", dtest$outcome_type) ~ "Z",
TRUE ~ "C"
)
dtest$group <- factor(dtest$group, c("C", "Z", "SS"))
plot_fun <- function(.data) {
remove_facet <- if (unique(.data$group) %in% c("C", "Z")) {
theme(strip.text.y = element_blank())
}
.data$outcome_type <- forcats::fct_relevel(
.data$outcome_type,
"CT", "CI", "CE", "FZ", "MZ", "PSS", "CSS"
)
.data |>
tidyr::complete(outcome_type, wave = unique(dtest$wave), var2) %>%
ggplot(aes(x = var2, y = avg2, fill = var2)) +
geom_bar(
stat = "identity",
width = 1
) +
facet_grid(wave ~ outcome_type) +
theme_minimal() +
remove_facet
}
dtest_split <- split(dtest, dtest$group)
lapply(dtest_split, plot_fun) %>%
wrap_plots() +
plot_layout(widths = c(3, 2, 2), guides = "collect") &
labs(x = NULL, y = NULL, fill = NULL) &
theme(axis.text.x = element_blank())
#> Warning: 4 unknown levels in `f`: FZ, MZ, PSS, and CSS
#> Warning: 5 unknown levels in `f`: CT, CI, CE, PSS, and CSS
#> Warning: 5 unknown levels in `f`: CT, CI, CE, FZ, and MZ
#> Warning: Removed 4 rows containing missing values (`position_stack()`).
#> Removed 4 rows containing missing values (`position_stack()`).
Here is a solution where we first identify those avg2 < 5, then make a list of two data frames and plot for each data frame the corresponding plot:
library(tidyverse)
require(gridExtra)
my_list <- dtest %>%
pivot_longer(c(contains("type"))) %>%
mutate(value = fct_relevel(value, "CT", "CI", "CE", "FZ", "MZ", "PSS", "CSS")) %>%
arrange(value) %>%
mutate(x = ifelse(avg2 <5, 1, 0)) %>%
group_split(x)
plot1 <- ggplot(my_list[[1]], aes(x = var2, y = avg2, fill = var2))+
geom_col()+
facet_grid(wave ~ value) +
theme_minimal() +
theme(legend.position = "none",
strip.text.y = element_blank()
)
plot2 <- ggplot(my_list[[2]], aes(x = var2, y = avg2, fill = var2))+
geom_col()+
facet_grid(wave ~ value)+
theme_minimal() +
theme(legend.position = "none")+
labs(y="")
grid.arrange(plot1, plot2, ncol=2)

Y-axis tick labels appearing out of place on top of each other

I have the following code to plot a graph:
ggplot(merged,aes(x = as.factor(`Branch Code`),y = as.factor(`Case Type`))) + geom_bar(stat = "identity") + theme_classic() + theme(axis.text.x=element_text(angle=45,vjust=0.5),axis.title.x=element_text(vjust=-1),plot.title = element_text(hjust=0.5)) + ggtitle("Distribution of case types") + xlab("Branch Code") + ylab("Case Type")
This gives me the following graph:
However, the y-axis labels are not equally/regularly spaced as they should be...
I have a minimal reproducible example below.
Would anybody be able to give me a helping hand?
structure(list(`Branch Code` = c(80012, 80012, 80012, 80012, 80012, 80012), `Location Type` = c("Rural", "Rural", "Rural",
"Rural", "Rural", "Rural"), Type = c("LM", "LM", "LM", "LM",
"LM", "LM"), Status = c("Open", "Open", "Open", "Open", "Open",
"Open"), Segment = c("Agency", "Agency", "Agency", "Agency",
"Agency", "Agency"), `Multiple (partner that owns multiple branches)` = c("Multiple 13",
"Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13"
), RetailType = c("Convenience", "Convenience", "Convenience",
"Convenience", "Convenience", "Convenience"), `Volume of transactions` = c(1130,
1130, 1130, 1130, 1130, 1130), `Open hours` = c(108.25, 108.25,
108.25, 108.25, 108.25, 108.25), `X Pos` = c(551872, 551872,
551872, 551872, 551872, 551872), `Y Pos` = c(170269, 170269,
170269, 170269, 170269, 170269), Urbanity = c("Medium Density",
"Medium Density", "Medium Density", "Medium Density", "Medium Density",
"Medium Density"), `Case Reference Number` = c("1967808-C5F1P3",
"1962373-N7X5C2", "2052107-N2R3C8", "2122905-K9T0M7", "2149177-H3W7C9",
"2143459-L4X2D8"), `Created On` = structure(c(1625672980, 1625233808,
1632225098, 1637064726, 1638808983, 1638376615), tzone = "UTC", class = c("POSIXct",
"POSIXt")), `Branch Type` = c("Main", "Main", "Main", "Main",
"Main", "Main"), L1 = c("Back Office", "Mails", "Mails", "DVLA",
"DVLA", "Post Office - ATM"), L2 = c("Cash Handling and Distribution",
"Accounting and Despatch", "Redirection", "Counter Procedure",
"Methods of payment", "Post Office - ATM"), L3 = c("Bank Holiday Arrangements",
"Despatch Report", "Counter Procedure", "Reversal", "Methods of Payment",
"Post Office - ATM"), L4 = c("Bank Holiday Arrangements", "Correcting/Resolving",
"Business", "After Cut Off", "Methods of Payment", "QR Code is not scanning Error Handling"
), `Case Type` = c("Question", "Question", "Question", "Question",
"Question", "Question")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
I had difficulty understanding your sample data, so I updated it as well. I defined case study and branch code factor levels.
Sample code:
library(ggplot2)
merged$`Branch Code`=factor(merged$`Branch Code`, levels=c("80012", "80013", "80014", "80015", "80016", "80017"))
merged$`Case Type`=factor(merged$`Case Type`, levels=c("Question 1", "Question 2", "Question 3", "Question 4", "Question 5", "Question 6"))
ggplot(merged,aes(x = `Branch Code`,y = `Case Type`)) +
geom_bar(stat = "identity") +
theme_classic()+
theme(axis.text.x=element_text(angle=45,vjust=0.5),
axis.title.x=element_text(vjust=-1),
plot.title = element_text(hjust=0.5)) +
ggtitle("Distribution of case types") +
xlab("Branch Code") +
ylab("Case Type")
Plot:
Sample data:
merged<-structure(list(`Branch Code` = structure(1:6, .Label = c("80012",
"80013", "80014", "80015", "80016", "80017"), class = "factor"),
`Location Type` = c("Rural", "Rural", "Rural", "Rural", "Rural",
"Rural"), Type = c("LM", "LM", "LM", "LM", "LM", "LM"), Status = c("Open",
"Open", "Open", "Open", "Open", "Open"), Segment = c("Agency",
"Agency", "Agency", "Agency", "Agency", "Agency"), `Multiple (partner that owns multiple branches)` = c("Multiple 13",
"Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13",
"Multiple 13"), RetailType = c("Convenience", "Convenience",
"Convenience", "Convenience", "Convenience", "Convenience"
), `Volume of transactions` = c(1130, 1130, 1130, 1130, 1130,
1130), `Open hours` = c(108.25, 108.25, 108.25, 108.25, 108.25,
108.25), `X Pos` = c(551872, 551872, 551872, 551872, 551872,
551872), `Y Pos` = c(170269, 170269, 170269, 170269, 170269,
170269), Urbanity = c("Medium Density", "Medium Density",
"Medium Density", "Medium Density", "Medium Density", "Medium Density"
), `Case Reference Number` = c("1967808-C5F1P3", "1962373-N7X5C2",
"2052107-N2R3C8", "2122905-K9T0M7", "2149177-H3W7C9", "2143459-L4X2D8"
), `Created On` = structure(c(1625672980, 1625233808, 1632225098,
1637064726, 1638808983, 1638376615), tzone = "UTC", class = c("POSIXct",
"POSIXt")), `Branch Type` = c("Main", "Main", "Main", "Main",
"Main", "Main"), L1 = c("Back Office", "Mails", "Mails",
"DVLA", "DVLA", "Post Office - ATM"), L2 = c("Cash Handling and Distribution",
"Accounting and Despatch", "Redirection", "Counter Procedure",
"Methods of payment", "Post Office - ATM"), L3 = c("Bank Holiday Arrangements",
"Despatch Report", "Counter Procedure", "Reversal", "Methods of Payment",
"Post Office - ATM"), L4 = c("Bank Holiday Arrangements",
"Correcting/Resolving", "Business", "After Cut Off", "Methods of Payment",
"QR Code is not scanning Error Handling"), `Case Type` = structure(1:6, .Label = c("Question 1",
"Question 2", "Question 3", "Question 4", "Question 5", "Question 6"
), class = "factor")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame")

Add_annotations plotly first and last datapoints

My Main Goal:
Trying to add annotations to both the first datapoint of my
scatterplot and the last datapoint of my scatterplot (the entries for
years 2006 and 2021 respectively).
My Secondary Goals:
If possible, it would also be helpful to find out how to select out
specific datapoints to add annotations, as I only know the
which.max/which.min functions so far.
It would also be nice to know how to list the jobs on each point.
My Dput:
structure(list(Year = 2006:2021, Month_USD = c(1160L, 1240L,
1360L, 1480L, 1320L, 1320L, 375L, 1600L, 2000L, 2000L, 1600L,
2240L, 1900L, 2300L, 2900L, 2300L), Degree = c("High School",
"High School", "High School", "High School", "High School", "High School",
"High School", "High School", "High School", "BA", "BA", "BA",
"BA", "BA", "M.Ed", "M.Ed"), Country = c("USA", "USA", "USA",
"USA", "USA", "USA", "DE", "USA", "USA", "USA", "USA", "USA",
"PRC", "PRC", "PRC", "HK"), Job = c("Disher", "Prep", "Prep",
"Prep", "Prep", "Prep", "Au Pair", "CSA", "Valet", "Valet", "Intake",
"CM", "Teacher", "Teacher", "Teacher", "Student"), Median_Household_Income_US = c(4833L,
4961L, 4784L, 4750L, 4626L, 4556L, 4547L, 4706L, 4634L, 4873L,
5025L, 5218L, 5360L, 5725L, NA, NA), US_Home_Price_Index = c(183.24,
173.36, 152.56, 146.69, 140.64, 135.16, 143.88, 159.3, 166.5,
175.17, 184.51, 195.99, 204.9, 212.59, 236.31, NA)), class = "data.frame", row.names = c(NA,
-16L))
Current Scatterplot:
pal <- c("Red", "Blue", "Green")
plot_ly(data = Earnings_Year,
x=~Year,
y=~Month_USD,
type='scatter',
mode='markers',
symbol = ~as.factor(Degree),
symbols=c("star-open-dot","hexagon-open-dot","diamond-open-dot"),
color = ~as.factor(Degree),
colors = pal,
hoverinfo="text",
text= paste("Year: ",
Earnings_Year$Year,
"<br>", #this is a line break
"Monthly USD: ",
Earnings_Year$Month_USD),
size=10) %>%
add_annotations(
x=Earnings_Year$Year[which.min(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.min(Earnings_Year$Month_USD)],
text = "Au Pair Job in Germany") %>%
add_annotations(
x=Earnings_Year$Year[which.max(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.max(Earnings_Year$Month_USD)],
text = "Last Teaching Job in China") %>%
layout(legend= list(x=1,y=0.5),
title="Earnings by Degree",
xaxis=list(title="Year"),
yaxis=list(title="Monthly USD"))
Image of Current Scatter:
Scatter That I Want:
Figured it out. Just needed to pipe additional add_annotations as well as just select specific values for x and y:
pal <- c("Red", "Blue", "Green")
plot_ly(data = Earnings_Year,
x=~Year,
y=~Month_USD,
type='scatter',
mode='markers',
symbol = ~as.factor(Degree),
symbols=c("star-open-dot","hexagon-open-dot","diamond-open-dot"),
color = ~as.factor(Degree),
colors = pal,
hoverinfo="text",
text= paste("Year: ",
Earnings_Year$Year,
"<br>", #this is a line break
"Monthly USD: ",
Earnings_Year$Month_USD),
size=10) %>%
add_annotations(
x=Earnings_Year$Year[which.min(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.min(Earnings_Year$Month_USD)],
text = "Au Pair Job in Germany") %>%
add_annotations(
x=Earnings_Year$Year[which.max(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.max(Earnings_Year$Month_USD)],
text = "Last Teaching Job in China") %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2006],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==1160],
text="First Job"
) %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2021],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==2300],
text="Began Ph.D.") %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2008],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==1360],
text="Finished H.S.") %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2015],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==2000],
text="Finished BA") %>%
layout(legend= list(x=1,y=0.5),
title="Earnings by Degree",
xaxis=list(title="Year"),
yaxis=list(title="Monthly USD"))
Finished Product:

GGplot boxplot and dotplot side by side [duplicate]

This question already has answers here:
How to plot a hybrid boxplot: half boxplot with jitter points on the other half?
(3 answers)
Closed 4 years ago.
I'm using R and GGplot2 and I want to make a graph in which there are boxes (from geom_boxplot) and points (from geom_dotplot) one next to the other, not above one another.
The picture shows what I've managed to do for now, which is pretty much what I'd like to have except that, instead of the pink boxes, I want just the points from the geom_dotplot
df2%>%ggplot(aes(factor(Organ), value,fill=Crop)) +
geom_boxplot() +
#geom_dotplot(binaxis='y',stackdir = 'center')
scale_y_log10("BCF") + facet_wrap(~Csoil)+theme(axis.title.x=element_blank())
This is a subset of the data that I'm using:
df2=structure(list(variable = structure(c(8L, 2L, 6L, 14L, 5L, 14L,
3L, 8L, 5L, 6L, 2L, 13L, 5L, 7L, 13L, 3L, 8L, 10L, 10L, 2L, 2L,
8L, 6L, 9L, 10L, 14L, 14L, 1L, 4L, 13L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), .Label = c("BCF leaf (CsoilMax) - Ryegrass",
"BCF root (CsoilMax) - Ryegrass", "BCF root (CsoilMed) - Ryegrass",
"BCF stem (CsoilMax) - Ryegrass", "BCF stem (CsoilMed) - Ryegrass",
"BCF leaf (CsoilMed) - Ryegrass", "BCF fruit (CsoilMax) - Maize",
"BCF fruit (CsoilMed) - Maize", "BCF leaf (CsoilMax) - Maize",
"BCF leaf (CsoilMed) - Maize", "BCF root (CsoilMax) - Maize",
"BCF root (CsoilMed) - Maize", "BCF stem (CsoilMax) - Maize",
"BCF stem (CsoilMed) - Maize"), class = "factor"), value = c(0.209606259766772,
0.506401960143269, 0.000660265316109507, 6.51930210129075, 0.0207635735841085,
0.765736181143394, 1.40601301731962, 0.00743553520916094, 0.040587560806454,
1.60441689044071, 0.522063333823462, 0.540632385379595, 0.0497467701708571,
0.0573435549206478, 0.0109218792177719, 0.608263306408034, 0.911628697794879,
0.0874473327218576, 0.105726788757446, 0.267232334133824, 1.3297503892306,
0.292440363142525, 0.037969380254565, 0.000157551659778798, 0.237462116816578,
0.0447547790805731, 1.29271738284098, 0.000673871813931306, 0.00388673588576815,
0.0223297222656565, 6.82, 11.53, 5.39, 5, 5, 0.005, 107.984,
26.987, 0.005, 0.005, 132.28), Crop = c("Maize", "Ryegrass",
"Ryegrass", "Maize", "Ryegrass", "Maize", "Ryegrass", "Maize",
"Ryegrass", "Ryegrass", "Ryegrass", "Maize", "Ryegrass", "Maize",
"Maize", "Ryegrass", "Maize", "Maize", "Maize", "Ryegrass", "Ryegrass",
"Maize", "Ryegrass", "Maize", "Maize", "Maize", "Maize", "Ryegrass",
"Ryegrass", "Maize", "Literature", "Literature", "Literature",
"Literature", "Literature", "Literature", "Literature", "Literature",
"Literature", "Literature", "Literature"), Csoil = c("Median soil C",
"Maximum soil C", "Median soil C", "Median soil C", "Median soil C",
"Median soil C", "Median soil C", "Median soil C", "Median soil C",
"Median soil C", "Maximum soil C", "Maximum soil C", "Median soil C",
"Maximum soil C", "Maximum soil C", "Median soil C", "Median soil C",
"Median soil C", "Median soil C", "Maximum soil C", "Maximum soil C",
"Median soil C", "Median soil C", "Maximum soil C", "Median soil C",
"Median soil C", "Median soil C", "Maximum soil C", "Maximum soil C",
"Maximum soil C", "Median soil C", "Median soil C", "Median soil C",
"Median soil C", "Median soil C", "Median soil C", "Median soil C",
"Median soil C", "Median soil C", "Median soil C", "Median soil C"
), Organ = c("Fruits", "Roots", "Leaves", "Stem", "Stem", "Stem",
"Roots", "Fruits", "Stem", "Leaves", "Roots", "Stem", "Stem",
"Fruits", "Stem", "Roots", "Fruits", "Leaves", "Leaves", "Roots",
"Roots", "Fruits", "Leaves", "Leaves", "Leaves", "Stem", "Stem",
"Leaves", "Stem", "Stem", "Leaves lit.", "Leaves lit.", "Roots lit.",
"Roots lit.", "Roots lit.", "Fruits lit.", "Fruits lit.", "Fruits lit.",
"Fruits lit.", "Fruits lit.", "Fruits lit.")), .Names = c("variable",
"value", "Crop", "Csoil", "Organ"), row.names = c(NA, -41L), class = "data.frame")
The solution was just filtering the data within ggplot.
This is the code of the graph:
df2%>%ggplot(aes(factor(Organ), value,fill=Crop)) +
geom_boxplot(data=.%>%filter(Crop!="Literature")) +
geom_dotplot(data=.%>%filter(Crop=="Literature"),binaxis='y',stackdir = 'center',dotsize=0.35,fill="black") +
scale_y_log10("BCF") + facet_wrap(~Csoil)+theme(axis.title.x=element_blank())
Not a real answer but a suggestion: I tend to prefer geom_jitter(size = 0.5, width = 0.2) to overlay dots with boxplots, and use the outlier.size = NULL argument in geom_boxplot not to duplicate dots.

Add circular axis on circular plot

I would like to overlay my plot with circles as axis to illustrate probability levels (e.g 0.25; 0.75, 1).
To reproduce the graphic you need these 2 csv files in working directory
https://drive.google.com/open?id=1RsleBYQFlm3ce3xuqTLK-_r9s374yd40
Or since I have been kindly advised by #Gregor in comments here are the headers of my data objects so no downloading is necessary:
dput(head(data))
structure(list(id = 1:6, individual = structure(c(1L, 12L, 23L,
26L, 27L, 28L), .Label = c("Person 1", "Person 10", "Person 11",
"Person 12", "Person 13", "Person 14", "Person 15", "Person 16",
"Person 17", "Person 18", "Person 19", "Person 2", "Person 20",
"Person 21", "Person 22", "Person 23", "Person 24", "Person 25",
"Person 26", "Person 27", "Person 28", "Person 29", "Person 3",
"Person 30", "Person 31", "Person 4", "Person 5", "Person 6",
"Person 7", "Person 8", "Person 9"), class = "factor"), value = c(0.658333333,
0.958333333, 0.720833334, 0.883333333, 0.779166667, 0.9375),
group = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "A", class = "factor")), .Names = c("id",
"individual", "value", "group"), row.names = c(NA, 6L), class = "data.frame")
and second object:
dput(head(label_data))
structure(list(id = 1:6, individual = structure(c(1L, 12L, 23L,
26L, 27L, 28L), .Label = c("Person 1", "Person 10", "Person 11",
"Person 12", "Person 13", "Person 14", "Person 15", "Person 16",
"Person 17", "Person 18", "Person 19", "Person 2", "Person 20",
"Person 21", "Person 22", "Person 23", "Person 24", "Person 25",
"Person 26", "Person 27", "Person 28", "Person 29", "Person 3",
"Person 30", "Person 31", "Person 4", "Person 5", "Person 6",
"Person 7", "Person 8", "Person 9"), class = "factor"), value = c(0.658333333,
0.958333333, 0.720833334, 0.883333333, 0.779166667, 0.9375),
group = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "A", class = "factor"),
hjust = c(0, 0, 0, 0, 0, 0), angle = c(84.375, 73.125, 61.875,
50.625, 39.375, 28.125)), .Names = c("id", "individual",
"value", "group", "hjust", "angle"), row.names = c(NA, 6L), class = "data.frame")
And then run following:
library(tidyverse)
library(ggplot2)
library(plotrix)
data=read.csv(file="data_object_2.csv", header=TRUE, sep=",")
label_data=read.csv(file="label_data_object_2.csv", header=TRUE, sep=",")
empty_bar=1
to_add = data.frame( matrix(NA, empty_bar*nlevels(data$group), ncol(data)) )
colnames(to_add) = colnames(data)
to_add$group=rep(levels(data$group), each=empty_bar)
data=rbind(data, to_add)
data=data %>% arrange(group)
data$id=seq(1, nrow(data))
number_of_bar=nrow(label_data)
angle= 90 - 360 * (label_data$id-0.5) /32
label_data$hjust<-ifelse( angle < -90, 1, 0)
label_data$angle<-ifelse(angle < -90, angle+180, angle)
p = ggplot(data, aes(x=as.factor(id), y=value)) +
geom_bar(stat="identity", fill=alpha("skyblue", 0.7)) +
ylim(-0.3,1) +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar(start = 0) +
geom_text(data=label_data, aes(x=id, y=value, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) +
geom_vline(xintercept = 0, color = "grey", linetype = "dashed") +
annotate("text", label = "p=0", x = 0, y = 0, color = "black") +
annotate("text", label = "p=1", x = 0, y = 1, color = "black") +
annotate("text", label = "p=0.5", x = 0, y = 0.5, color = "black")
p
It will result in this:
https://drive.google.com/open?id=1xDOym_nn-x9nrUoKpB9rtg7h7NYIfucF
I would like to overlay with circles indicating probability levels to enhance readability. All the on-line help which I have found is related to common Cartesian graph or geom_circle function which did not work either.
I will really appreciate any help.
Thanks Marek
Here's an option over a simplified version of your code (I didn't want to recreate all the labels, etc, just a pared down version of your chart). I thought about it like this: if this were in regular Cartesian coordinates, you could show an overlay of a probability by making a horizontal line, so in polar coordinates, that line would become a circle. Adding a geom_hline gives you a circle at whatever yintercept you set.
It might be good to label those probabilities; you can figure out what's the best way to do that in your context, but I just made a couple of circles, set the y-breaks to those same values, and moved the y axis title to be near the labels so they had a little explanation. Based on your context, that might not all be necessary.
As an aside, I'd recommend combining these two data frames into one, so you can more easily keep track of things and not have to set different data = arguments in different geoms.
library(tidyverse)
label_data %>%
ggplot(aes(x = individual, y = value)) +
geom_col(width = 0.5, fill = "skyblue", alpha = 0.7) +
geom_hline(yintercept = c(0.5, 0.75, 0.9), color = "gray60") +
scale_y_continuous(limits = c(-0.3, NA), breaks = c(0.5, 0.75, 0.9)) +
theme_minimal() +
theme(panel.grid = element_blank(), axis.title.y = element_text(hjust = 0.87)) +
coord_polar(start = 0) +
labs(x = NULL, y = "Probability")
Created on 2018-06-03 by the reprex package (v0.2.0).

Resources