Plotly: How to convert ggplot object into plotly? - r

I have one data set with two columns.First column is with name Centile.threshold and second is Effective.tax.rates. So next steep is to make plot with this code below.
# Data and code
library("rJava")
library("xlsxjars")
library("xlsx")
require(tidyr)
require(plyr)
library("ggplot2")
library("plotly")
g4_data_ext<-data.frame(structure(list(Centile.threshold = c(1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100), Effective.tax.rates = c(11.4875111183361,
8.07673874931679, 7.8639563682086, 6.84656368538612, 6.8383437884744,
5.83532681932012, 5.11955857753708, 4.68757220539562, 4.66492423437793,
4.63051650494602, 4.71570390296145, 4.4419481131279, 4.16374366969064,
4.4424750798236, 4.2633646392858, 4.10185821346884, 2.29819561722,
2.01895390855722, 0, 0, 2.80530318111453, 2.83367683951859, 3.03173307975026,
3.58758933227946, 3.05869003045607, 3.59225918463074, 3.01588015121759,
3.55943967998446, 3.6220565232692, 3.40885422585891, 3.51447401518606,
3.68902868712004, 3.77018314638409, 3.72783452684771, 3.80791342516448,
3.99449874248864, 4.08421307782513, 4.07517557211112, 4.19659018929637,
4.22887420998102, 4.25529382081159, 4.36205679154288, 4.40690982734329,
4.33909305037396, 4.45990415426276, 4.59436808108174, 4.6831546716255,
4.73811656768519, 4.75412915916737, 4.84778797281815, 4.94690785473091,
5.06784298188807, 5.1769208879221, 5.2520552039406, 5.33650672817794,
5.43499638671921, 5.52400199193912, 5.58169115527766, 5.70509046165446,
5.76549758450655, 5.86333222670147, 5.87097687497217, 5.98729128544292,
6.07397530734785, 6.15030076581313, 6.21615540600908, 6.28135059352123,
6.38292345287997, 6.43416757218245, 6.5863284138631, 6.63365437304645,
6.70316768627345, 6.7816891944299, 6.85128738244695, 6.94261253911407,
7.01673024329712, 7.11081973369591, 7.18077796481166, 7.26197149513331,
7.32607460317916, 7.39638728837014, 7.47062968448649, 7.55194205005014,
7.64318101794584, 7.73728594723894, 7.79092205170689, 7.88152530983832,
7.97428540786095, 8.09278589483141, 8.20373396784042, 8.27757060469128,
8.40889176349213, 8.50851684368756, 8.64124701008068, 8.72559960562268,
8.85276486059087, 9.06564270204267, 9.26861906650096, 9.43047799204161,
10.2298639144453), grp_id = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
6, 6, 6, 7, 8), grp_label = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 2L, 3L), .Label = c("<=50%",
"=99%", ">99%", "51%-60%", "61%-70%", "71%-80%", "81%-90%", "91%-98%"
), class = "factor")), row.names = c(NA, -100L), class = "data.frame"))
This is code for plotting a plot with ggplot. This code works well.
g4_data_ext<-data.frame(g4_data, grp_id=NA, grp_label=NA)
for (i in 1:length((g4_data$Centile.threshold)))
{
if (g4_data$Centile.threshold[i]<=50)
{
g4_data_ext$grp_label[i] <- "<=50%"
g4_data_ext$grp_id[i] <- 1
}
else if (51<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=60)
{
g4_data_ext$grp_label[i] <- "51%-60%"
g4_data_ext$grp_id[i] <- 2
}
else if (61<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=70)
{
g4_data_ext$grp_label[i] <- "61%-70%"
g4_data_ext$grp_id[i] <- 3
}
else if (71<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=80)
{
g4_data_ext$grp_label[i] <- "71%-80%"
g4_data_ext$grp_id[i] <- 4
}
else if (81<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=90)
{
g4_data_ext$grp_label[i] <- "81%-90%"
g4_data_ext$grp_id[i] <- 5
}
else if (90<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=98)
{
g4_data_ext$grp_label[i] <- "91%-98%"
g4_data_ext$grp_id[i] <- 6
}
else if (g4_data$Centile.threshold[i]==99)
{
g4_data_ext$grp_label[i] <- "=99%"
g4_data_ext$grp_id[i] <- 7
}
else
{
g4_data_ext$grp_label[i] <- ">99%"
g4_data_ext$grp_id[i] <- 8
}
}
g4_data_ext$grp_label<-factor(g4_data_ext$grp_label)
color_code<-factor(c(rep("dark turquoise", 6), "tomato", "orange red"))
means_g4<-data.frame(ddply(g4_data_ext,~grp_id+grp_label,summarise,mean=mean(Effective.tax.rates),sd=sd(Effective.tax.rates)), color_code)
ggplot(data = means_g4, aes(x=factor(grp_id),y=mean, label =mean ))+
scale_x_discrete("Group",breaks=c(1:8), labels=means_g4$grp_label)+
coord_cartesian(ylim = c(-3, 12)) +
geom_bar(stat="identity", fill=color_code)+
geom_text(aes(y = mean*1.1, label = round(mean,2)), position=position_dodge(0.9))+
theme(axis.text.x = element_text(angle = 0))
But my intention is to convert this plot with ggplotly().I tryed to convert but is work well, so can anybody help me how to convert this plot into plotly object ?

When I try
ggplotly(ggplot(data = means_g4, aes(x=factor(grp_id),y=mean, label =mean ))+
scale_x_discrete("Group",breaks=c(1:8), labels=means_g4$grp_label)+
coord_cartesian(ylim = c(-3, 12)) +
geom_bar(stat="identity", fill=color_code)+
geom_text(aes(y = mean*1.1, label = round(mean,2)), position=position_dodge(0.9))+
theme(axis.text.x = element_text(angle = 0)))
I get the error
Error in nchar(axisObj$ticktext) : 'nchar()' requires a character vector
This error goes away if I leave out the scale_x_discrete term. The only thing there that looks like it should be a character vector is labels=means_g4$grp_label, and indeed means_g4$grp_label is a factor. So convert it, and the ggplotly() works:
ggplotly(ggplot(data = means_g4, aes(x=factor(grp_id),y=mean, label =mean ))+
scale_x_discrete("Group",breaks=c(1:8), labels=as.character(means_g4$grp_label))+
coord_cartesian(ylim = c(-3, 12)) +
geom_bar(stat="identity", fill=color_code)+
geom_text(aes(y = mean*1.1, label = round(mean,2)), position=position_dodge(0.9))+
theme(axis.text.x = element_text(angle = 0)))
This looks like a bug in ggplotly(), which should be doing that conversion automatically.

user2554330 figured out the hard part of your question. The rest is easy, and since I don't see your actual attempt at converting ggplot to plotly, here's how you can do that:
From a "standard" ggplot approach, just include ggplotly:
p <- ggplot(...)
p <- p + geom_point(...) # just an example...
fig <- ggplotly(p)
fig
Here's an example:
library(plotly)
library(tidyverse)
library(ggplot2)
# ggplot
p <- ggplot(data=iris, aes(x = Sepal.Length, y = Sepal.Width))
p <- p + geom_point(aes(color=Species, shape=Species)) +
xlab("Sepal Length") + ylab("Sepal Width") +
ggtitle("Sepal Length-Width")
# plotly
fig <- ggplotly(p)
fig
Plot:

Related

How to correct a different distance between bars in geom_col

I am making a geom_col in ggplot2. The x-axis is a numerical vector of timepoints (0, 6, 18, 24, 32, 44). There is a difference between each column corresponding to the numerical difference between each timepoint. But i want an equal distance between all the columns. I have searched for answers in here, but i didn't find a similar issue.
This is my code:
ggplot(data = ny_dataframe_scratch, aes(x=timepoint, y = relative_wound_healing, fill = Condition)) +
geom_col(width = 5, position = position_dodge()) +
scale_x_continuous(breaks=c(0, 6, 18, 24, 32, 44), name = "Time point, hours") +
scale_y_continuous(name = "Relative scratch area") +
scale_fill_manual(values=c("palevioletred4", "slategray")) +
geom_point(data = ny_dataframe_scratch, position = position_dodge(width = 5), aes(x=timepoint, y=relative_wound_healing, fill = Condition))
This is the output of dput():
structure(list(timepoint = c(0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6,
6, 18, 18, 18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 32, 32, 32,
32, 32, 32, 44, 44, 44, 44, 44, 44), Condition = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L), .Label = c("Control", "Knockout"), class = "factor"),
relative_wound_healing = c(1, 1, 1, 1, 1, 1, 0.819981, 0.78227,
0.811902, 0.873852, 0.893572, 0.910596, 0.39819, 0.436948,
0.559486, 0.534719, 0.591295, 0.612154, 0.222731, 0.2592,
0.453575, 0.37238, 0.477891, 0.505393, 0.05243246, 0.0809449,
0.2108063, 0.261122, 0.3750218, 0.4129873, 0, 0.0240122,
0.0778219, 0.0806758, 0.2495444, 0.3203724)), class = "data.frame", row.names = c(NA,
-36L))
Picture of how the graph looks:
The x-scale has proportional gaps because ‘ggplot2’ considers the values as continuous rather than categorical.
To make it categorical, you can for instance use factors:
aes(x = factor(timepoint, ordered = TRUE), …
(Without ordered = TRUE, ‘ggplot2’ assumes alphabetical ordering, so it would put 11 before 5, which probably isn’t what you want.)
To fix the bar heights, you need to compute and plot a summary statistic — ‘ggplot2’ allows you to do this using stat_summary (instead of geom_col):
stat_summary(fun.y = mean, geom = "col", position = position_dodge())
Taken together:
ggplot(ny_dataframe_scratch) +
aes(x = factor(timepoint, ordered = TRUE), y = relative_wound_healing, fill = Condition) +
scale_fill_manual(values = c("palevioletred4", "slategray")) +
stat_summary(fun.y = mean, geom = "col", position = position_dodge()) +
geom_point(position = position_dodge(width = 1)) +
labs(x = "Time point, hours", y = "Relative scratch area")
Your timepoints are "numeric". Try coercing them to factor. At that point, ggplot should plot them at equidistance from each other.
xy$timepoint <- as.factor(xy$timepoint)

how to control the line thickness in geom_line

I would like to control the thickness for lines in my plotting, however I ran into some difficulty. It seems like if I add size=0.06 or size=2 in geom_line(), it did not really change the line thickness to different size. Also it added strange legend in the output. how should I fix that?
I codes I used to do plotting are:
ggplot(data =df)+
geom_line(aes(x = ADY, y = AVAL, color = PARAMCD, yaxs="d", xaxs="d", size=0.06))+
geom_point(aes(x = ADY, y = AVAL))+
scale_color_discrete(breaks=c("SYSBP", "DIABP", "PULSE"),name = "Vital signs", labels = c("Systolic BP", "Diastolic BP", "Pulse"))+
scale_colour_manual(values=c(DIABP="#512d69",SYSBP="#007254",PULSE="#fd9300"))
The output for size=0.06 and size =2 are:
Could someone give me some guidance on this? I don't want size to be shown on legend and I would like to control the thickness of the lines.Thanks.
The sample data can be build using codes:
df<- structure(list(ADY = c(-6, -6, -6, 1, 1, 1, 8, 8, 8, 15, 15,
15, 22, 22, 22, 29, 29, 29, 43, 43, 43, 57, 57, 57, 64, 87, 87,
87, 101, 101, 101), AVAL = c(66, 67, 127, 70, 58, 136, 68, 74,
140, 145, 74, 58, 75, 72, 149, 82, 66, 143, 86, 60, 159, 64,
87, 136, NA, 73, 58, 135, 141, 74, 74), PARAMCD = structure(c(3L,
1L, 2L, 1L, 3L, 2L, 3L, 1L, 2L, 2L, 1L, 3L, 1L, 3L, 2L, 1L, 3L,
2L, 1L, 3L, 2L, 3L, 1L, 2L, NA, 1L, 3L, 2L, 2L, 1L, 3L), .Label = c("DIABP",
"SYSBP", "PULSE"), class = "factor")), row.names = c(NA, -31L
), class = "data.frame")
size should be outside aes for your case :
You can see the difference between between size = 0.06 and size = 2.
library(ggplot2)
ggplot(data =df)+
geom_line(aes(x = ADY, y = AVAL, color = PARAMCD, yaxs="d", xaxs="d"), size=0.06) +
geom_point(aes(x = ADY, y = AVAL))+
scale_colour_manual(values=c(DIABP="#512d69",SYSBP="#007254",PULSE="#fd9300"))
ggplot(data =df)+
geom_line(aes(x = ADY, y = AVAL, color = PARAMCD, yaxs="d", xaxs="d"), size=2) +
geom_point(aes(x = ADY, y = AVAL))+
scale_colour_manual(values=c(DIABP="#512d69",SYSBP="#007254",PULSE="#fd9300"))

Issue with graphing subjects and items growth curve model vs subjects-only in ggplot

I have been having trouble graphing growth curve model results that have been calculated over both subjects and items (both included as random effects), while models calculated over a dataset that is averaged over items, so that subjects are the only random effect, seems to work fine. I cannot seem to figure out why this would be or how to fix it.
Graph of Subject Only Model
Graph of Subjects and Items Model
Summary DF Subj Items
Summary DF Subj Only
> dput(head(new.df.subjitems, 20))
structure(list(Item.No = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1),
Subject_No = c(330, 322, 141, 330, 330,
330, 137, 330, 330, 330, 137, 330, 137, 330, 330, 137, 137, 330,
137, 141),
Bin.No = c(35, 17, 19, 44, 42, 34, 31, 23, 36, 32,
33, 28, 23, 33, 37, 7, 4, 30, 28, 31),
TargetFix = c(1, 1, 1,
0, 0.02, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0),
Condition.E = structure(c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L), .Label = c("First", "Second", "Max Entropy, Unrelated",
"Third", "Fourth", "Max Entropy, Competitive"), class = "factor"),
ot1 = c(-0.00489995957550024, 0.220498180897511, -0.171498585142509,
0.142098827689507, 0.210698261746511, -0.20089834259551,
-0.0538995553305028, -0.151898746840508, -0.112699070236506,
-0.230298100048512, 0.0146998787265008, -0.161698665991508,
-0.151898746840508, 0.0146998787265008, -0.122498989387506,
0.0538995553305028, 0.181298504293509, -0.0440996361795023,
-0.161698665991508, -0.0538995553305028),
ot2 = c(-0.158018948215706,
0.226392531578272, 0.0744512352170155, 0.00151941296361255,
0.192965446378795, 0.161057774142931, -0.135227753761518,
0.024310607417801, -0.0577376926172773, 0.26133902974136,
-0.156499535252094, 0.0486212148356019, 0.024310607417801,
-0.156499535252094, -0.0395047370539266, -0.135227753761518,
0.101800668562042, -0.142824818579581, 0.0486212148356019,
-0.135227753761518),
ot3 = c(0.0112384799617412, 0.177762495548696,
0.0718614343707494, -0.14311051566666, 0.113069194230467,
-0.05612035827049, 0.113717568074414, 0.12506411034348, 0.167352493276442,
-0.250560469916257, -0.0335353360396831, 0.101434485808536,
0.12506411034348, -0.0335353360396831, 0.16389449944206,
-0.113717568074414, -0.035984748339037, 0.0957432042894495,
0.101434485808536, 0.113717568074414),
ot4 = c(0.158779933129858,
0.0903598052498265, -0.175158477576023, -0.166766788637329,
-0.00086703192354873, -0.0706803112875428, 0.0864589810947309,
-0.178854426512889, -0.0950686572656207, 0.205524262921174,
0.153690832709029, -0.182263550144233, -0.178854426512889,
0.153690832709029, -0.123325047363878, 0.0864589810947308,
-0.15558879673071, 0.109609880754701, -0.182263550144233,
0.0864589810947309)),
.Names = c("Item.No", "Subject_No",
"Bin.No", "TargetFix", "Condition.E", "ot1", "ot2", "ot3", "ot4"
), row.names = c(1L, 5L, 8L, 22L, 29L, 59L, 61L, 74L, 78L, 86L,
90L, 98L, 101L, 111L, 115L, 120L, 126L, 133L, 140L, 145L), class = "data.frame")
> dput(head(df.subjonly, 20))
structure(list(Subject_No = c(103, 103, 103, 103, 103, 103, 103,
103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103
),
Bin.No = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20),
TargetFix = c(0.16667, 0.16667, 0.16667,
0.16667, 0.32667, 0.39, 0.5, 0.5, 0.5, 0.5, 0.62667, 0.59, 0.66667,
0.66667, 0.76667, 0.76333, 0.66667, 0.40667, 0.33333, 0.48333
),
Condition.E = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("First",
"Second", "Max Entropy, Unrelated", "NaN, Unrelated", "Third",
"Fourth", "Max Entropy, Competitive", "NaN, Competitive", "Low Entropy, NaN",
"High Entropy, NaN", "Max Entropy, NaN", "NaN, NaN"), class = "factor"),
ot1 = c(-0.240098019199512, -0.230298100048512, -0.220498180897511,
-0.210698261746511, -0.20089834259551, -0.19109842344451,
-0.181298504293509, -0.171498585142509, -0.161698665991508,
-0.151898746840508, -0.142098827689507, -0.132298908538507,
-0.122498989387506, -0.112699070236506, -0.102899151085505,
-0.0930992319345048, -0.0832993127835043, -0.0734993936325038,
-0.0636994744815033, -0.0538995553305028),
ot2 = c(0.297804940868062,
0.26133902974136, 0.226392531578271, 0.192965446378795, 0.161057774142931,
0.13066951487068, 0.101800668562042, 0.0744512352170154,
0.0486212148356019, 0.0243106074178009, 0.0015194129636125,
-0.0197523685269634, -0.0395047370539266, -0.0577376926172774,
-0.0744512352170155, -0.0896453648531411, -0.103320081525654,
-0.115475385234555, -0.126111275979843, -0.135227753761518
),
ot3 = c(-0.331823325024233, -0.250560469916256, -0.177762495548696,
-0.113069194230468, -0.0561203582704902, -0.00655577997768254,
0.0359847483390369, 0.0718614343707493, 0.101434485808536,
0.12506411034348, 0.14311051566666, 0.15593390946916, 0.16389449944206,
0.167352493276442, 0.166668098663387, 0.162201523293977,
0.154312974859294, 0.143362661050417, 0.12971078955843, 0.113717568074414
),
ot4 = c(0.347265133901292, 0.205524262921176, 0.0903598052498273,
-0.000867031923547791, -0.0706803112875423, -0.121489365408538,
-0.15558879673071, -0.175158477576023, -0.182263550144233,
-0.178854426512889, -0.16676678863733, -0.147721588350685,
-0.123325047363878, -0.0950686572656211, -0.0643291795224186,
-0.0323686454785664, -0.000334356356151326, 0.0307411167449481,
0.0599399328470624, 0.0864589810947308)), .Names = c("Subject_No",
"Bin.No", "TargetFix", "Condition.E", "ot1", "ot2", "ot3", "ot4"
), row.names = 3:22, class = "data.frame")
>
# create 4th-order polynormial in the range of Bin.no
t <- poly ((unique(new.df.subjitems$Bin.No)), 4)
# create variables ot1, ot2, ot3, ot4 corresponding to the orthogonal
polynomial time terms and populate their values
# with the Bin.No-appropriate orthogonal polynomial values:
new.df.subjitems[,paste("ot", 1:4, sep="")] <- t[new.df.subjitems$Bin.No, 1:4]
Model.subjitems.2 <- lmer(TargetFix ~ (ot1+ot2+ot3+ot4)*Condition.E +
(1+ot1+ot2+ot3+ot4|Subject_No) +(1+ot1+ot2+ot3+ot4|Item.No),
control = lmerControl(optimizer="bobyqa"),
data=new.df.subjitems, REML=F)
# create 4th-order polynormial in the range of Bin.no
t <- poly ((unique(df.subjonly$Bin.No)), 4)
# create variables ot1, ot2, ot3, ot4 corresponding to the orthogonal
polynomial time terms and populate their values
# with the Bin.No-appropriate orthogonal polynomial values:
df.subjonly[,paste("ot", 1:4, sep="")] <- t[df.subjonly$Bin.No, 1:4]
Model.subj.2 <- lmer(TargetFix ~ (ot1+ot2+ot3+ot4)*Condition.E +
(1+ot1+ot2+ot3+ot4|Subject_No),
control = lmerControl(optimizer="bobyqa"), data=df.subjonly,
REML=F)
# Graph Subject Items
ggplot(data=new.df.subjitems, aes(Bin.No, TargetFix, color=Condition.E,
lty=Condition.E, shape=Condition.E)) +
stat_summary(fun.data=mean_se,geom="ribbon",linetype=0,alpha=0.25) +
stat_summary(fun.data=mean_se,geom="point",size=1,alpha=0.40)
+stat_summary(aes(y=fitted(Model.subjitems.2)),
fun.y=mean,geom="line",size=2.0,alpha=0.9) +
theme_bw(base_size=10) +
labs(y="Fixation Proportion", x="Bins") +
ggtitle("Subjects and Items, Target Fixations") +
theme(text=element_text(color = "black", size=20, family = "Georgia")) +
theme(axis.text = element_text(color = "black", size=10, family = "Georgia"))
+
scale_color_viridis(begin=0, end = .8, discrete=TRUE) +
scale_shape_manual(values=c(3,2,16,7)) + #16,3,2
theme(legend.key.width=unit(4,"line")) +
ggsave("Testing_SubjectItems_v3.png",width=10,height=5)
# Graph Subject Only
ggplot(data=df.subjonly, aes(Bin.No, TargetFix, color=Condition.E,
lty=Condition.E, shape=Condition.E)) +
stat_summary(fun.data=mean_se,geom="ribbon",linetype=0,alpha=0.25) +
stat_summary(fun.data=mean_se,geom="point",size=1,alpha=0.40) +
stat_summary(aes(y=fitted(Model.subj.2)),
fun.y=mean,geom="line",size=2.0,alpha=0.9) +
theme_bw(base_size=10) +
labs(y="Fixation Proportion", x="Bins") +
ggtitle("Subj Only, Target Fixations") +
theme(text=element_text(color = "black", size=20, family = "Georgia")) +
theme(axis.text = element_text(color = "black", size=10, family = "Georgia"))
+
scale_color_viridis(begin=0, end = .8, discrete=TRUE) +
scale_shape_manual(values=c(3,2,16,7)) + #16,3,2
theme(legend.key.width=unit(4,"line")) +
ggsave("Testing_SubjectOnly_v3.png",width=10,height=5)

Advanced stacked bar chart ggplot2

Say I have the data frame:
df<-structure(list(predworker = c(1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8,
8, 8, 8, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11
), worker = c(1, 14, 11, 19, 13, 23, 3, 15, 20, 6, 15, 3, 5,
4, 22, 5, 21, 11, 14, 4, 15, 23, 6, 20, 3, 17, 16, 8, 9, 7, 8,
17, 9, 16, 7, 17, 9, 8, 16, 7, 10, 19, 2, 15, 14, 14, 1, 11,
19, 13), finalratio = c(0.358338156170776, 0.328697413978311,
0.200283479825366, 0.0634027658799677, 0.049278184145579, 0.245483741112573,
0.216351263581975, 0.211285529819829, 0.171813670019988, 0.155065795465635,
0.216637792442049, 0.21365067362223, 0.20254559121035, 0.184813787488195,
0.182352155237176, 0.257680316012908, 0.233934275233779, 0.18618378722994,
0.173241645742261, 0.14895997578111, 0.295633225885233, 0.197824577675154,
0.173926460086197, 0.169883366487268, 0.162732369866148, 0.312634332494825,
0.213471605336063, 0.168500990861721, 0.156199312722058, 0.149193758585333,
0.288139828063799, 0.249716321272007, 0.228189414450808, 0.132448859555662,
0.101505576657724, 0.28062982129018, 0.24896481457126, 0.185822099676468,
0.175529116141424, 0.109054148320668, 0.843396823680576, 0.0488581484138975,
0.0419903739183709, 0.0332313337137541, 0.0325233202734015, 0.354288383060293,
0.308159669367751, 0.222981515774462, 0.0731493536310783, 0.0414210781664159
), rank = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L)),
.Names = c("predworker", "worker", "finalratio", "rank"),
row.names = c(NA, -50L), class = c("grouped_df", "tbl_df","tbl", "data.frame"),
vars = "predworker", drop = TRUE,
indices = list( 0:4, 5:9, 10:14, 15:19, 20:24, 25:29, 30:34, 35:39, 40:44,
45:49),
group_sizes = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
biggest_group_size = 5L, labels = structure(list(predworker = c(1, 3, 4, 5, 6, 7, 8, 9, 10, 11)),
row.names = c(NA, -10L), class = "data.frame", vars = "predworker",
drop = TRUE, .Names = "predworker"))
which looks as follows:
predworker worker finalratio rank
<dbl> <dbl> <dbl> <int>
1. 1. 0.358 1
1. 14. 0.329 2
1. 11. 0.200 3
1. 19. 0.0634 4
1. 13. 0.0493 5
3. 23. 0.245 1
I'm trying to do a stacked bar chart using ggplot2. I'm looking for something similar to this
ggplot(df, aes(x = factor(predworker) ,y = finalratio, fill = factor(rank))) + geom_bar(stat = "identity")
However, there are some other details I do not know how to add to this plot:
I'd like to order each bar by rank (i.e. I want the longest bar at the beginning from the bottom. Opposite of what is now.)
How can I do two subplots in the same figure. Say I want the first 6 bars in one subplot and the rest in another subplot, but self-contained (sort of a facet in ggplot.)
How can I write a value within each bar for each category? For instance, for each rank, I'd like to write the corresponding finalratio and the worker (other column) value within the limits of each sub-bar.
To order bars by rank you need to reorder the factor levels, from 5 to 1.
You could subset the data by predworker and use something like gridExtra::gridarrange or cowplot::plot_grid to combine subplots. Or: you could add another column to indicate facets and facet on that.
You use geom_text. You'll want to round finalratio or there will be too many digits.
Putting it all together: I'm using the facet approach and ungrouping your grouped tibble because it interferes with mutate:
library(tidyverse)
df %>%
ungroup() %>%
mutate(facet = ifelse(predworker > 7, 2, 1),
rank = factor(rank, levels = 5:1),
predworker = factor(predworker)) %>%
group_by(predworker) %>%
ggplot(aes(predworker, finalratio)) +
geom_col(aes(fill = rank)) +
geom_text(aes(label = paste(worker, "=", round(finalratio, digits = 2))),
position = position_stack(vjust = 0.5)) +
facet_grid(~facet, scales = "free_x")
Or to facet vertically:
df %>%
ungroup() %>%
mutate(facet = ifelse(predworker > 7, 2, 1),
rank = factor(rank, levels = 5:1),
predworker = factor(predworker)) %>%
group_by(predworker) %>%
ggplot(aes(predworker, finalratio)) +
geom_col(aes(fill = rank)) +
geom_text(aes(label = paste(worker, "=", round(finalratio, digits = 2))),
position = position_stack(vjust = 0.5)) +
facet_wrap(~facet, scales = "free_x", ncol = 1)

Setting range and breaks on scale on ggplot2

Using a sample dataframe:
df <- structure(list(SITCD = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("GSO/TO", "IKOF", "JL",
"MES", "SSD", "USSD"), class = "factor"), Code = structure(c(27L,
21L, 3L, 25L, 26L, 20L, 2L, 28L, 230L, 16L, 4L, 10L, 15L, 1L), .Label = c("AAR-2107",
"AAR-643", "AAR-644", "AAR-995", "HAR-2956", "HAR-2957", "I-430",
"I-431", "I-432", "I-9490", "I-9491", "K-1461", "K-1740", "K-1915",
"K-2034", "K-2096", "K-2385", "K-2386", "K-2387", "K-3112", "K-3220",
"K-3224", "Lu-1095", "Lu-1103", "LU-3282", "LU-3283", "LU-3284",
"LU-3400", "Lu-487", "Lu-489,90", "Lu-491,92", "Lu-528", "Lu-529",
"Lu-530", "Lu-531", "Lu-585", "Lu-586", "Lu-608", "Lu-646", "Lu-647",
"Lu-648", "Lu-711", "Lu-714", "Lu-766", "Lu-768", "Lu-790", "Lu-792",
"Lu-793", "Lu-826", "Lu-827", "Lu-828", "Lu-829", "Lu-830", "Lu-831",
"Lu584", "M-1611", "M-1612", "M-1613", "M-1614", "M-1615", "M-1616",
"M-1617", "M-1618", "M-1619", "M-1620", "M-1621", "M-1622", "M-1623",
"M-1624", "OS-49305", "OS-49306", "OS-49308", "OS-49309", "OS-49311",
"OS-49312", "OS-49313", "OS-49314", "OS-49315", "OS-49384", "OS-49385",
"OS-49386", "OS-49387", "OS-49403", "OS-49414", "OS-49437", "OS-49440",
"OS-49441", "OS-49442", "OS-49493", "OS-49496", "OS-49499", "OS-49502",
"OS-49506", "OS-49515", "OS-49516", "OS-49517", "OS-49518", "OS-49519",
"OS-49520", "OS-49555", "OS-49558", "OS-49562", "OS-49565", "OS-49578",
"OS-49580", "OS-49581", "OS-49582", "OS-49583", "OS-49584", "OS-49605",
"OS-49606", "OS-49607", "OS-51568", "OS-51716", "OS-51759", "OS-51760",
"OS-51765", "OS-51766", "OS-51767", "OS-51769", "OS-51770", "OS-51774",
"OS-51775", "OS-51776", "OS-51845", "OS-51846", "OS-51847", "OS-51874",
"OS-51875", "OS-51882", "OS-51883", "OS-51884", "OS-51885", "OS-52112",
"OS-52956", "OS-52957", "OS-52962", "OS-52963", "OS-52964", "OS-52966",
"OS-52967", "OS-52968", "OS-52969", "OS-52970", "OS-54002", "OS-54004",
"OS-54005", "OS-54006", "OS-54007", "OS-54008", "OS-54009", "OS-54045",
"OS-54046", "OS-54048", "OS-54073", "OS-54074", "OS-54075", "OS-54076",
"OS-54077", "OS-54892", "OS-55609", "OS-55610", "OS-55611", "OS-55612",
"OS-55613", "OS-55614", "OS-55724", "OS-55725", "OS-55728", "OS-55729",
"OS-55730", "OS-55731", "OS-55732", "OS-55733", "OS-55734", "OS-55735",
"OS-55736", "OS-55737", "OS-58249", "OS-58250", "OS-58324", "OS-58325",
"OS-58326", "OS-58327", "OS-58509", "OS-58606", "OS-58607", "OS-58609",
"OS-58673", "OS-58674", "OS-58701", "OS-58702", "OS-58703", "OS-58704",
"OS-58705", "OS-58732", "OS-58735", "OS-59579", "OS-62849", "OS-62850",
"OS-62851", "OS-62852", "OS-62855", "OS-62985", "OS-62986", "OS-62992",
"OS-62994", "OS-64754", "OS-64755", "OS-64756", "OS-64759", "OS-64760",
"OS-64762", "OS-64764", "OS-64765", "OS-64766", "OS-64843", "OS-64844",
"OS-64845", "OS-64849", "OS-65398", "OS-65399", "OS-65401", "OS-65405",
"OS-65406", "OS-65435", "OS-65436", "OS-65437", "OS-65438", "T-10382",
"Unknown", "W-1381", "Y596", "Y599", "Y600", "Y602", "Y702",
"Y703", "Y704", "Y708", "Y711", "Y712", "Y713", "Y714", "Y716",
"Y717", "Y876", "Y878", "Y879", "Y882", "Y883", "Y884"), class = "factor"),
Type = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L), .Label = c("Above", "At", "Below"), class = "factor"),
RSL = c(5, 8, 17.5, 19, 27, 30, 30, 33, 35, 40, 40, 50, 53,
70), RSL_error = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5,
2), Age = c(8183.5, 9221.5, 10424.5, 10069, 9092, 10465.5,
9204.5, 10531.5, 9844.5, 10073.5, 9905, 9907.5, 11660, 10698.5
), age_error = c(232.5, 295.5, 519.5, 371, 323, 377.5, 336.5,
324.5, 318.5, 408.5, 327, 380.5, 463, 394.5), x_min_error = c(7951L,
8926L, 9905L, 9698L, 8769L, 10088L, 8868L, 10207L, 9526L,
9665L, 9578L, 9527L, 11197L, 10304L), x_max_error = c(8416L,
9517L, 10944L, 10440L, 9415L, 10843L, 9541L, 10856L, 10163L,
10482L, 10232L, 10288L, 12123L, 11093L), y_min_error = c(3,
6, 15.5, 17, 25, 28, 28, 31, 33, 38, 38, 48, 48, 68), y_max_error = c(7,
10, 19.5, 21, 29, 32, 32, 35, 37, 42, 42, 52, 58, 72)), .Names = c("SITCD",
"Code", "Type", "RSL", "RSL_error", "Age", "age_error", "x_min_error",
"x_max_error", "y_min_error", "y_max_error"), row.names = c(NA,
14L), class = "data.frame")
I wish to draw a graph using the following code:
g <- ggplot (df, aes(x=Age, y=RSL, shape = Type)) +
geom_point() +
scale_shape_manual(values=c(1,15,5)) + #makes open circle/triangle
theme(axis.line=element_line(colour = "black", size = 0.5, linetype = "solid")) + # adds solid black x and y axis
geom_errorbar(aes(ymin=y_min_error, ymax=y_max_error,width=0,)) + # y error bar
geom_errorbarh(aes(xmin=x_min_error, xmax=x_max_error,height=0,)) +
theme_classic() +
theme_bw()+ #Black outline around the graph
xlim(0, 14000) +#Set axis limits
ylim(0, 120) +
#scale_x_continuous(breaks=seq(0,14000,2000))+
#scale_y_continuous(breaks=seq(0,120,20))+
theme(legend.position="bottom")
g
I was wondering why I am having difficulty setting the axes scale. I am trying to use the scale_x_continuous(breaks=seq(...) code which wasn't working. I then read elsewhere that I had to set the limits of the scales which I did with xlim/ylim but I can't use this with the scale_x_continuous code as I get the error message:
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Does anyone have any ideas?
Replace xlim(0, 14000) with scale_x_continuous(breaks=seq(1, 15000, 1000), limits = c(0, 14000))
Tidier code:
library(ggplot2)
ggplot(df, aes(Age, RSL, shape = Type)) +
geom_point() +
geom_errorbarh(aes(xmin = x_min_error,
xmax = x_max_error,
height = 0)) +
geom_errorbar(aes(ymin = y_min_error,
ymax = y_max_error,
width = 0)) +
scale_shape_manual(values = c(1, 15, 5)) +
scale_y_continuous(limits = c(0, 120)) +
scale_x_continuous(breaks=seq(1, 15000, 1000),
limits = c(0, 14000))

Resources