Related
The first five entries (out of twenty) of my dataset:
>head(data)
Name SDC
<chr> <Period>
1 Feuerman 1M 37S
2 Solis 1M 52S
3 Osborne 1M 47S
4 Frizzell 1M 58S
5 Moran 1M 59S
Also:
> dput(head(data))
structure(list(Name = c("Feuerman", "Solis", "Osborne", "Frizzell",
"Moran", "Seth"), Deadlift = c(320, 250, 340, 250, 250, 200),
Medicine_Ball = c(11.6, 8.8, 12.5, 9.2, 9.7, 9.1), HRP = c(46,
39, 36, 33, 42, 31), SDC = new("Period", .Data = c(37, 52,
47, 58, 59, 15), year = c(0, 0, 0, 0, 0, 0), month = c(0,
0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0), hour = c(0, 0,
0, 0, 0, 0), minute = c(1, 1, 1, 1, 1, 2)), Leg_Tuck = c(20,
13, 4, 10, 13, 13), Run = new("Period", .Data = c(48, 59,
10, 53, 0, 29), year = c(0, 0, 0, 0, 0, 0), month = c(0,
0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0), hour = c(0, 0,
0, 0, 0, 0), minute = c(13, 12, 17, 16, 0, 16)), Total = c(570,
508, 513, 470, 410, 452), Pass_Fail = structure(c(1L, 1L,
2L, 1L, 2L, 1L), .Label = c("Pass", "Fail"), class = "factor"),
Date = structure(c(18522, 18522, 18522, 18522, 18522, 18522
), class = "Date")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
As you can see, SDC is in minutes:seconds format. I achieved this using ms(data$SDC) to change the column type. I am trying to create a plot using geom_col that orders SDC from lowest to highest times. I am facing two problems:
When using the reorder command, the times are not properly reordered (see plot below).
The axes are being formatted by hour:minute:second but I want it to be formatted in only minute:second format (also see plot below).
Here is my code to generate the plot:
ggplot(data=data,
aes(x=reorder(Name, -SDC), y=SDC, fill=Pass_Fail)) +
scale_y_time(limits=c(0,200)) +
scale_fill_manual(values=c('#00BFC4', '#F8766D')) +
labs(x='Soldier', y='Sprint Drag Carry Time', fill='Passed/Failed ACFT', title='Sprint Drag Carry Scores') +
geom_col() +
geom_text(size=3, aes(label = SDC), hjust=-0.04) +
coord_flip() +
theme_classic()
It produces the following plot:
As you can see, the reordering is incorrect and the axes are not formatted the way I want them to be. Thanks in advance for your help.
I think reorder have trouble working with Period object. We can arrange the factor levels according to the value of SDC to get bars in increasing orders.
We can pass custom function for y-axis to get only minutes and seconds in labels.
library(tidyverse)
data %>%
arrange(SDC) %>%
mutate(Name = factor(Name, levels = unique(Name))) %>%
ggplot() + aes(x=Name, y=SDC, fill=Pass_Fail) +
scale_y_time(limits=c(0,200),
labels = function(x) sprintf('%02s:%02s', minute(x),second(x))) +
scale_fill_manual(values=c('#00BFC4', '#F8766D')) +
labs(x='Soldier', y='Sprint Drag Carry Time',
fill='Passed/Failed ACFT', title='Sprint Drag Carry Scores') +
geom_col() +
geom_text(size=3, aes(label = SDC), hjust=-0.04) +
coord_flip() +
theme_classic()
My data looks like this:
mydata <- data.frame(ID = c(1, 2, 3, 5, 6, 7, 9, 11, 12, 13), #patient ID
t1 = c(37, 66, 28, 60, 44, 24, 47, 44, 33, 47), #evaluation before
t4 = c(33, 45, 27, 39, 24, 29, 24, 37, 27, 42), #evaluation after
sexe = c(1, 2, 2, 1, 1, 1, 2, 2, 2, 1)) #subset
I would like to do a simple before-after graph.
So far, I managed to get this:
With this:
library(ggplot2)
ggplot(mydata) +
geom_segment(aes(x = 1, xend = 2, y = t1, yend = t4), size=0.6) +
scale_x_discrete(name = "Intervention", breaks = c("1", "2"), labels = c("T1", "T4"), limits = c(1, 2)) +
scale_y_continuous(name = "Var") + theme_bw()
I am facing multiple issues, can you help me to...
add black circle at the begining and the end of every line? (geom_point() doesn't work)
make line smoother (look how pixelated they are, especially the second one)?
decrease blank space on left and right side of the graph?
add median for T1 and T4 (in red), link those points, compare them with paired mann whitney test and print p-value on the graph?
I would like not to reformat my database to long format I have a lot of other variable and timepoint (not shown here).
I have read other posts (such as here) but solution provided look so complicated for something that seems simple (yet i can't do it...).
Huge thanks for your help!
I will update the graph along with progression :)
EDIT
I would like not to reformat my database to long format as I have a lot of other variables and timepoints (not shown here)...
Here what i would do! Please feel free to ask questions regarding what's going on here.
library(tidyverse)
mydata <- data.frame(ID = c(1, 2, 3, 5, 6, 7, 9, 11, 12, 13), #patient ID
t1 = c(37, 66, 28, 60, 44, 24, 47, 44, 33, 47), #evaluation before
t4 = c(33, 45, 27, 39, 24, 29, 24, 37, 27, 42), #evaluation after
sexe = c(1, 2, 2, 1, 1, 1, 2, 2, 2, 1))
pval <- wilcox.test(x = mydata$t1,y = mydata$t4, paired = T,exact = F)$p.value %>% round(2)
df <- mydata %>%
pivot_longer(2:3,names_to = "Time") %>% # Pivot into long-format
mutate(sexe = as.factor(sexe),
Time = as.factor(Time)) # Make factors
ggplot(df,aes(Time,value,color = sexe,group = ID)) +
geom_point() +
geom_line() +
stat_summary(inherit.aes = F,aes(Time,value),
geom = "point", fun = "median", col = "red",
size = 3, shape = 24,fill = "red"
) +
annotate("text", x = 1.7, y = 60, label = paste('P-Value is',pval)) +
coord_cartesian(xlim = c(1.4,1.6)) +
theme_bw()
Also be aware that it is common to have some variables which repeat through time, in addition to the long format data. See example here:
mydata <- data.frame(ID = c(1, 2, 3, 5, 6, 7, 9, 11, 12, 13), #patient ID
t1 = c(37, 66, 28, 60, 44, 24, 47, 44, 33, 47), #evaluation before
t4 = c(33, 45, 27, 39, 24, 29, 24, 37, 27, 42), #evaluation after
sexe = c(1, 2, 2, 1, 1, 1, 2, 2, 2, 1),
var1 = c(1:10),
var2 = c(1:10),
var3 = c(1:10))
df <- mydata %>%
pivot_longer(2:3,names_to = "Time") %>% # Pivot into long-format
mutate(sexe = as.factor(sexe),
Time = as.factor(Time))
I can address (1) black circles issue:
First, you should tidy your data, so one column holds information of one variable (now 'Var' values on the plot are stored in two columns: 't1' and 't4'). You can achive this with tidyr package.
library(tidyr)
mydata_long <- pivot_longer(mydata, c(t1, t4), names_to = "t")
Now creating points is easy, and the rest of the code becomes a lot clearer:
We can tell ggplot that we want 't' groups on x-axis, their values on y-axis and in case of lines, we want them separate for every 'ID'.
ggplot(mydata_long) +
geom_line(aes(x = t, y = value, group = ID)) + #ploting lines
geom_point(aes(x = t, y = value)) + #ploting points
labs(x = "Intervention", y = "Var") + #changing labels
theme_bw()
I'm trying to use ggplot, and am hoping to create a boxplot that has four categories on the x axis for suspension data (low, lowish, highish, high) and farms on the y-axis.
I have I think broken the suspension column into four groups. But ggplot is upset with me. Here is the error:
```
Error in if (is.double(data$x) && !has_groups(data) && any(data$x != data$x[1L])) { : missing value where TRUE/FALSE needed
```
Here is my code:
```{r}
# To break suspension_rate_total_pct data into groups for clearer visualization, I found the min, and max
merged_data$suspension_rate_total_pct <-
as.numeric(merged_data$suspension_rate_total_pct)
max(merged_data$suspension_rate_total_pct, na.rm=TRUE)
min(merged_data$suspension_rate_total_pct, na.rm=TRUE)
low_suspension <- merged_data$suspension_rate_total_pct > 0 & merged_data$suspension_rate_total_pct < 0.5
low_ish_suspension <- merged_data$suspension_rate_total_pct > 0.5 & merged_data$suspension_rate_total_pct < 1
high_ish_suspension <- merged_data$suspension_rate_total_pct > 1 & merged_data$suspension_rate_total_pct < 1.5
high_suspension <- merged_data$suspension_rate_total_pct > 1.5 & merged_data$suspension_rate_total_pct < 2
ggplot(merged_data, aes(x = suspension_rate_total_pct , y = farms_pct)) +
geom_boxplot()
```
Here is the Data:
merged_data <- structure(list(schid = c("1030642", "1030766", "1030774", "1030840",
"1130103", "1230150"), enrollment = c(159, 333, 352, 430, 102,
193), farms = c(132, 116, 348, 406, 68, 130), foster = c(2, 0,
1, 8, 1, 4), homeless = c(14, 0, 8, 4, 1, 4), migrant = c(0,
0, 0, 0, 0, 0), ell = c(18, 12, 114, 45, 7, 4), suspension_rate_total = c(NA,
20, 0, 0, 95, 5), suspension_violent = c(NA, 9, 0, 0, 20, 2),
suspension_violent_no_injury = c(NA, 6, 0, 0, 47, 1), suspension_weapon = c(NA,
0, 0, 0, 8, 0), suspension_drug = c(NA, 0, 0, 0, 9, 1), suspension_defiance = c(NA,
1, 0, 0, 9, 1), suspension_other = c(NA, 4, 0, 0, 2, 0),
farms_pct = c(0.830188679245283, 0.348348348348348, 0.988636363636364,
0.944186046511628, 0.666666666666667, 0.673575129533679),
foster_pct = c(0.0125786163522013, 0, 0.00284090909090909,
0.0186046511627907, 0.00980392156862745, 0.0207253886010363
), migrant_pct = c(0, 0, 0, 0, 0, 0), ell_pct = c(0.113207547169811,
0.036036036036036, 0.323863636363636, 0.104651162790698,
0.0686274509803922, 0.0207253886010363), homeless_pct = c(0.0880503144654088,
0, 0.0227272727272727, 0.00930232558139535, 0.00980392156862745,
0.0207253886010363), suspension_rate_total_pct = c(NA, 2,
1, 1, 2, 2)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
If you can, please help me appease ggplot so that it will give me with beautiful visualization. Currently, this feels like a one-sided, emotional rollercoaster of a relationship.
Just a short answer, i am sure you can figure out the rest by yourself, (otherwise post a followup question.)
Since the data you provided has some NA's in the first row in several columns, i can only demonstrate you the principle on how to get your desired result by using the merged_data$homless value as group-input for our boxplots , the data (y-value) will be still Farms .
# first we create our groups of low, middle & high amount of homeless
merged_data2<- merged_data %>% mutate(homelessgroup= ifelse(homeless < 4, "low",
ifelse(homeless <= 8, "middle",
ifelse(homeless > 8, "high",NA ))))
## then we plot the data using ggplot
ggplot(merged_data2,aes(y=farms,fill=homelessgroup))+geom_boxplot()
I think you can just use cut() with your data to partition into 4 groups. Then you can use that variable with the plot
merged_data <- transform(merged_data,
group = cut(
suspension_rate_total_pct,
c(0, .5, 1, 1.5, 2),
include.lowest = TRUE,
labels = c("low", "lowish", "highish", "high")))
ggplot(merged_data, aes(x = group , y = farms_pct)) +
geom_boxplot()
Similar questions to this have been asked, but I have not been able to apply the suggested solutions successfully.
I have created a plot like so;
> elective_ga <- c(68, 51, 29, 10, 5)
> elective_epidural <- c(29, 42, 19, 3, 1)
> elective_cse <- c(0, 0, 0, 20, 7)
> elective_spinal <- c(3, 7, 52, 67, 87)
> years <- c('1982', '1987', '1992', '1997', '2002')
> values <- c(elective_ga, elective_epidural, elective_cse, elective_spinal)
> elective_technique <- data.frame(years, values)
> p <- ggplot(elective_technique, aes(years, values))
> p +geom_bar(stat='identity', aes(fill=c(rep('GA', 5), rep('Epidural', 5), rep('CSE', 5), rep('Spinal', 5)))) +labs(x='Year', y='Percent', fill='Type')
which produces the following chart;
I was expecting the bars to be stacked in the order (from top to bottom) GA, Epidural, CSE, Spinal. I would have thought the way I constructed the data frame that they should be ordered in this way but obviously I have not. Can anyone explain why the bars are ordered the way they are, and how to get them the way I want?
How about this?
elective_ga <- c(68, 51, 29, 10, 5)
elective_epidural <- c(29, 42, 19, 3, 1)
elective_cse <- c(0, 0, 0, 20, 7)
elective_spinal <- c(3, 7, 52, 67, 87)
years <- c('1982', '1987', '1992', '1997', '2002')
values <- c(elective_ga, elective_epidural, elective_cse, elective_spinal)
Type=c(rep('GA', 5), rep('Epidural', 5), rep('CSE', 5), rep('Spinal', 5))
elective_technique <- data.frame(years, values,Type)
elective_technique$Type=factor(elective_technique$Type,levels=c("GA","Epidural","CSE","Spinal"))
p <- ggplot(elective_technique, aes(years, values,fill=Type))+geom_bar(stat='identity') +
labs(x='Year', y='Percent', fill='Type')
One way is to reorder the levels of the factor.
library(ggplot2)
elective_ga <- c(68, 51, 29, 10, 5)
elective_epidural <- c(29, 42, 19, 3, 1)
elective_cse <- c(0, 0, 0, 20, 7)
elective_spinal <- c(3, 7, 52, 67, 87)
years <- c('1982', '1987', '1992', '1997', '2002')
values <- c(elective_ga, elective_epidural, elective_cse, elective_spinal)
type = c(rep('GA', 5), rep('Epidural', 5), rep('CSE', 5), rep('Spinal', 5))
elective_technique <- data.frame(years, values, type)
# reorder levels in factor
elective_technique$type <- factor(elective_technique$type,
levels = c("GA", "Epidural", "CSE", "Spinal"))
p <- ggplot(elective_technique, aes(years, values))
p +
geom_bar(stat='identity', aes(fill = type)) +
labs(x = 'Year', y = 'Percent', fill = 'Type')
The forcats package may provide a cleaner solution.
This question already has answers here:
creating "radar chart" (a.k.a. star plot; spider plot) using ggplot2 in R
(5 answers)
Closed 7 years ago.
I am trying to make a Radar plot as in attached image using and ggplot2 ( or any other package in R).This talk about this but my case is different as i am trying to create a spider plot for response data with certain range.
I made a plot using a code as below, but i couldn't figure out howto make this like in the image. Kindly help me with this.
Impcts <- c("system","supply","security","well-being")
present <- c(5,5,3,5)
past <- c(6,6,4,5)
group.names <- c("present", "past")
ddf.pre <- data.frame(matrix(c(rep(group.names[1], 4), Impcts), nrow = 4, ncol = 2), var.order = seq(1:4), value = present)
ddf.pas <- data.frame(matrix(c(rep(group.names[2], 4), Impcts), nrow = 4, ncol = 2), var.order = seq(1:4), value = past)
ddf <- rbind(ddf.pre, ddf.pas)
colnames(ddf) <- c("Group", "Impcts", "var.order", "var.value")
library(ggplot2)
ggplot(ddf, aes(y = var.value, x = reorder(Impcts, var.order),
group = Group, colour = Group))+
coord_polar() +
geom_path() +
geom_point()+
labs(title = "Impacts Analysis").
Here is my attempt.First I drew squares using geom_path(). Then, I drew two polygons on top of the squares using geom_polygon(). Finally I added annotations.
### Draw squares
mydf <- data.frame(id = rep(1:6, each = 5),
x = c(0, 6, 0, -6, 0,
0, 5, 0, -5, 0,
0, 4, 0, -4, 0,
0, 3, 0, -3, 0,
0, 2, 0, -2, 0,
0, 1, 0, -1, 0),
y = c(6, 0, -6, 0, 6,
5, 0, -5, 0, 5,
4, 0, -4, 0, 4,
3, 0, -3, 0, 3,
2, 0, -2, 0, 2,
1, 0, -1, 0, 1))
g <- ggplot(data = mydf, aes(x = x, y = y, group = factor(id)) +
geom_path()
### Draw polygons
mydf2 <- data.frame(id = rep(7:8, each = 5),
x = c(0, 6, 0, -5, 0,
0, 5, 0, -5, 0),
y = c(6, 0, -4, 0, 6,
5, 0, -3, 0, 5))
gg <- g +
geom_polygon(data = mydf2, aes(x = x, y = y, group = factor(id), fill = factor(id))) +
scale_fill_manual(name = "Time", values = c("darkolivegreen4", "brown4"),
labels = c("Past", "Present"))
### Add annotation
mydf3 <- data.frame(x = c(0, 6.5, 0, -6.5),
y = c(6.5, 0, -6.5, 0),
label = c("system", "supply", "security", "well-being"))
ggg <- gg +
annotate("text", x = mydf3$x, y = mydf3$y, label = mydf3$label, size = 3)
ggsave(ggg, file = "name.png", width = 10, height = 9)