gganimate transition_reveal() with geom_line() breaking on the final frame? - r

I am trying to animate a line graph with multiple lines. It seems that there is an error with the gganimate package involving transition_reveal() that is causing the final frame to revert for all of the lines but one. This error is not present when not using gganimate. Here is the code:
df <- read.csv("test.csv", stringsAsFactors = TRUE)
anim <- ggplot(df, aes(Day, Accidents, group = State, color = State)) +
geom_line() +
transition_reveal(Day) +
ease_aes('cubic-in-out')
jiff <- animate(anim, fps = 24, duration = 5, start_pause = 0, end_pause = 72, height = 4, width = 7, units = "in", res = 150)
jiff
Here is the dput of the dataframe:
structure(list(State = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), levels = c("A", "B", "C", "D"), class = "factor"),
Day = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L),
Accidents = c(5L, 2L, 5L, 6L, 1L, 2L, 6L, 8L, 4L, 10L, 2L,
4L)), class = "data.frame", row.names = c(NA, -12L))
Here is the output:
Regardless of the ending pause or how many values I have along the x-axis, the final frame will always look like this with only one line appearing as updated. Does anyone know why this might be happening?
UPDATE: Reverting the gganimate package from 1.0.8 to 1.0.7 did seem to do the trick after all.

The issue is in this line start_pause = 0, end_pause = 72,. Remove or adapt it:
anim <- ggplot(df, aes(Day, Accidents, group= State, color = State)) +
geom_line() +
transition_reveal(Day) +
ease_aes('cubic-in-out')
animate(anim, fps = 24, duration = 5,
height = 4, width = 7, units = "in", res = 150)

Related

Creating raincloud plot from a data frame in R

I wanted a visualization something like this
I ended up getting like this one
I'm kind of close what I want to get except Im not able to separate them
Here is my data frame
dput(dat_red)
structure(list(FAB = structure(c(5L, 1L, 5L, 3L, 2L, 4L, 6L,
2L, 1L, 6L, 5L, 1L, 5L, 1L, 5L, 6L, 3L, 5L, 2L, 5L, 3L, 3L, 3L,
1L, 3L, 1L, 1L, 1L), .Label = c("M0", "M1", "M2", "M3", "M4",
"M5"), class = "factor"), Risk_Cyto = structure(c(2L, 3L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L), .Label = c("Good", "Intermediate",
"Poor"), class = "factor"), `TCGA-AB-2856` = c(0, 0.203446022561853,
0.057566971226641, 0.050525640210207, 0.050663468813024, 0.108022967842345,
0.03563961790061, 0.091955619434079, 0.09562601922977, 0.072990036124458,
0.05292549370956, 0.134908910498566, 0.056146007781438, 0.166755814327401,
0.072370918290216, 0.092982169160965, 0.053571132330207, 0.026946730545354,
0.096491482450314, 0.086393933157139, 0.086056971395349, 0.059872483122941,
0.05562972070039, 0.080629871622231, 0.06458076058265, 0.109295018454197,
0.15019108327262, 0.122208033564744), `TCGA-AB-2849` = c(0.203446022561853,
0, 0.138756102002674, 0.109150212934145, 0.130381628657973, 0.186028570196918,
0.201142265508601, 0.117008908236162, 0.07523492135779, 0.237542759238287,
0.154026516322799, 0.093169870680731, 0.174873827256869, 0.077917778705184,
0.217466101351585, 0.247196178178148, 0.139168631446623, 0.130879779506245,
0.094044964277672, 0.102330796604311, 0.115883670128914, 0.106007290303468,
0.124207778875499, 0.100051046626221, 0.096898638044544, 0.081075416500332,
0.066801569316824, 0.095571899845876), `TCGA-AB-2971` = c(0.057566971226641,
0.138756102002674, 0, 0.057153443556063, 0.049118618822663, 0.108803803345704,
0.038593571058361, 0.05623480754803, 0.061897696825206, 0.056921365921972,
0.027147582644049, 0.100579305160467, 0.031712766628694, 0.099623521686644,
0.043315406299788, 0.079156224894216, 0.070713735063067, 0.042797402350358,
0.064121331342957, 0.076245258448711, 0.057969352005916, 0.056411884330189,
0.029950269541688, 0.052538503817376, 0.053263317374002, 0.073813902166228,
0.081932722355952, 0.095255347468669), `TCGA-AB-2930` = c(0.050525640210207,
0.109150212934145, 0.057153443556063, 0, 0.040710142137316, 0.087506794353747,
0.076018856821365, 0.054334641613629, 0.043854827190482, 0.121490922447548,
0.060145981627256, 0.070829823037578, 0.0708179998993, 0.083561655580485,
0.106626803408534, 0.149000581782327, 0.049861493156012, 0.018112612744773,
0.05246829209315, 0.041582348253964, 0.053306367816997, 0.035373116643303,
0.042875256342202, 0.03406333799917, 0.036306618864362, 0.045647830531497,
0.084727864328183, 0.079147350281325), `TCGA-AB-2891` = c(0.050663468813024,
0.130381628657973, 0.049118618822663, 0.040710142137316, 0, 0.117167203965628,
0.057145523476846, 0.07089819966556, 0.058848771210843, 0.090222074046894,
0.052188574602838, 0.091623506635555, 0.053000329480576, 0.094592248885481,
0.082033497053918, 0.111240839210373, 0.065982245111563, 0.038618210190806,
0.063406266346048, 0.062231987650712, 0.067503749234478, 0.039970960455281,
0.042758552599394, 0.049740193805893, 0.04884538212911, 0.07959023948363,
0.090749468265183, 0.075792324166325)), class = "data.frame", row.names = c(NA,
-28L))
My code
dat_red = read.csv("JSD_test_map_.txt",sep = "\t",check.names = FALSE)
df_melt = melt(JSD_MAP, id.vars=c("FAB","Risk_Cyto")
)
To plot the above I used this tutorial
source("R_rainclouds.R")
df_melt %>% ggplot(aes(x=Risk_Cyto,y=value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2, alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(aes(x = Risk_Cyto, y = value, fill = FAB),outlier.shape = NA, alpha = .5, width = .1, colour = "black")+
#theme_jen() +
labs(title = "Raincloud plot of body mass by species", x = 'Risk_Cyto', y = 'JSD') +
easy_remove_legend()
So I have the following group in my metadata or patient info in this subset
> unique(dat_red$FAB)
[1] M4 M0 M2 M1 M3 M5
Levels: M0 M1 M2 M3 M4 M5
> unique(dat_red$Risk_Cyto)
[1] Intermediate Poor Good
Levels: Good Intermediate Poor
My objective is to show The Risk_Cyto as my main group similar to the first figure where They have shown ColonT HeartLV Liver Muscle etc and subsequently I have different FAB subtypes which i want to show similar to Young and Old
Right now everything is kind of stacked or rather messed up in single plot
Any help or suggestion is really appreciated
Put FAB on the x axis and facet by Risk_Cyto
df_melt %>%
ggplot(aes(FAB, value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2,
alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(outlier.shape = NA,
alpha = .5, width = .1, colour = "black")+
labs(title = "Raincloud plot of body mass by species",
x = 'Risk_Cyto', y = 'JSD') +
facet_grid(.~Risk_Cyto, scales = "free_x", space = "free_x") +
theme_bw(base_size = 16) +
theme(legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(face = 2, size = 22))

How to prevent R from alphabetically ranking data in ggplot and specify the order in which data is plotted (Data + Code + Graphs provided)?

I'm trying to fix an issue with my GGBalloonPlot graph with regards to how R processes the axis labels.
By default R plots the data using the labels ranked in reverse alphabetical order but to reveal the pattern of the data, the data need to be plotted in a specific order. The only way I've been able to do trick the software is by manually adding a prefix to each label in my .csv table so that R would rank them properly in my output. This is time consuming since I need to manually order the data first before adding the prefix and then plotting.
I would like to input a character vector (or something like that) which would essentially specify the order in which I want to have the data plotted which would reveal the pattern without the need for a prefix in the label name.
I have made some attempts with "scale_y_discrete" without success. I would also like to do the same thing for the X axis since I've had to use the same "trick" to display the columns in the proper non-alphabetical order which offsets the position of the labels. Any idea on how to get GGplot to display my values as seen in the graph without having to "trick" the software since this is quite time consuming ?
Data + Code
#Assign data to "Stack_Overflow_DummyData"
Stack_Overflow_DummyData <- structure(list(Species = structure(c(8L, 3L, 1L, 5L, 6L, 2L,
7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L,
7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L), .Label = c("Ani", "Cal",
"Can", "Cau", "Fis", "Ort", "Sem", "Zan"), class = "factor"),
Species_prefix = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L,
1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L,
2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("ac.Cau",
"ad.Sem", "af.Cal", "ag.Ort", "as.Fis", "at.Ani", "be.Can",
"bf.Zan"), class = "factor"), Dist = structure(c(2L, 3L,
5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L,
3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L
), .Label = c("End", "Ind", "Pan", "Per", "Wid"), class = "factor"),
Region = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Cen", "Col",
"Far", "Nor"), class = "factor"), Region_prefix = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("a.Far", "b.Nor", "c.Cen", "d.Col"), class = "factor"),
Frequency = c(75, 50, 25, 50, 0, 0, 0, 0, 11.1, 22.2, 55.6,
55.6, 11.1, 0, 5.6, 0, 0, 2.7, 36.9, 27.9, 65.8, 54.1, 37.8,
28.8, 0, 0, 0, 3.1, 34.4, 21.9, 78.1, 81.3)), class = "data.frame", row.names = c(NA,
-32L))
# Plot Data With Prefix Trick
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)
# Add Frequency Values Next to the circles
# Plot Data Without Prefix Trick
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)
# Add Frequency Values Next to the circles
Here below are the graphs
Good Graph.
Using the label prefix trick with the visible pattern in the data:
Wrong Graph (R default).
Without the prefix trick when GGplot automatically orders the data/labels and the graph makes no sense:
To sum up, I would like the Good graph output without having to have to previously add a prefix in my labels.
Many Thanks in advance for your help.
For the axis labels I would define a previous function to override the breaks:
shlab <- function(lbl_brk){
sub("^[a-z]+\\.","",lbl_brk) # removes the starts of strings as a. or ab.
}
Then, to change the labels you just have to use scale_x,y_discrete with labels = shlab (if you look at the help of scale_x_discrete you will see that one of the options for labels is A function that takes the breaks as input and returns labels as output).
For the colours would be enough to change them (values) in scale_fill_manual and for the sizes, using guides so:
library(ggplot2)
library(ggpubr)
shlab <- function(lbl_brk){
sub("^[a-z]+\\.","",lbl_brk)
}
ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix", size = "Frequency", size.range = c(1, 9), fill = "Dist") +
scale_x_discrete(labels = shlab) +
scale_y_discrete(labels = shlab) +
scale_fill_manual(values = c("green", "blue", "red", "black", "white")) +
guides(fill = guide_legend(override.aes = list(size=8))) +
theme_set(theme_gray() + theme(legend.key=element_blank())) + # Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) + # Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4) # Add Frequency Values Next to the circles
UPDATE:
With the new dataset and vector labels:
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
scale_y_discrete(limits = c("Cau", "Sem", "Cal", "Ort", "Fis", "Ani", "Can", "Zan")) +
scale_x_discrete(limits = c("Far", "Nor", "Cen", "Col")) +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)

Reordering factor for plotting using forcats and ggplot2 packages from tidyverse

First of all, thanks^13 to tidyverse. I want the bars in the chart below to follow the same factor levels reordered by forcats::fct_reorder (). Surprisingly, I see different order of levels in the data set when View ()ed as when they are displayed in the chart (see below). The chart should illustrate the number of failed students before and after the bonus marks (I want to sort the bars based on the number of failed students before the bonus).
MWE
ggplot (df) +
geom_bar (aes (forcats::fct_reorder (subject, FailNo, .desc= TRUE), FailNo, fill = forcats::fct_rev (Bonus)), position = 'dodge', stat = 'identity') +
theme (axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))
Data output of dput (df)
structure(list(subject = structure(c(1L, 2L, 5L, 6L, 3L, 7L,
4L, 9L, 10L, 8L, 12L, 11L, 1L, 2L, 5L, 6L, 3L, 7L, 4L, 9L, 10L,
8L, 12L, 11L), .Label = c("CAB_1", "DEM_1", "SSR_2", "RRG_1",
"TTP_1", "TTP_2", "IMM_1", "RRG_2", "DEM_2", "VRR_2", "PRS_2",
"COM_2", "MEB_2", "PHH_1", "PHH_2"), class = "factor"), Bonus = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("After", "Before"), class = "factor"),
FailNo = c(29, 28, 20, 18, 15, 13, 12, 8, 5, 4, 4, 2, 21,
16, 16, 14, 7, 10, 10, 5, 3, 4, 4, 1)), .Names = c("subject",
"Bonus", "FailNo"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-24L))
Bar chart
The issue
According to the table above, SSR_2 var should come in the fifth rank and IMM_1 in the sixth, however in the chart we see these two variables swapping their positions. How to sort it right after tidyverse in this case?
Use factor with unique levels for your x -axis.
ggplot (df) +
geom_bar (aes(factor(forcats::fct_reorder
(subject, FailNo, .desc= TRUE),
levels=unique(subject)),
FailNo,
fill = forcats::fct_rev (Bonus)),
position = 'dodge', stat = 'identity') +
theme(axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))
Edited: #dotorate comment
Sort failNo before the bonus
library(dplyr)
df_before_bonus <- df %>% filter(Bonus == "Before") %>% arrange(desc(FailNo))
Use FailNo before the bonus to create the factor
df$subject <- factor(df$subject, levels = df_before_bonus$subject, ordered = TRUE)
Updated plot
ggplot(df) +
geom_bar(aes (x = subject, y = FailNo, fill = as.factor(Bonus)),
position = 'dodge', stat = 'identity') +
theme (axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))

Add text to plot with facetted bar chart

My question is related to this question. I want "2014" in the 4-year facet. I tried to repeat but my code doesn't give what I want.
Annotating text on individual facet in ggplot2
This is my data
structure(list(Rot = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2-year",
"3-year", "4-year"), class = "factor"), Rot.Herb = structure(c(3L,
3L, 4L, 4L, 13L, 13L, 14L, 14L, 5L, 5L, 6L, 6L, 9L, 9L, 10L,
10L, 15L, 15L, 16L, 16L, 1L, 1L, 2L, 2L, 7L, 7L, 8L, 8L, 11L,
11L, 12L, 12L, 17L, 17L, 18L, 18L), .Label = c("A4-conv", "A4-low",
"C2-conv", "C2-low", "C3-conv", "C3-low", "C4-conv", "C4-low",
"O3-conv", "O3-low", "O4-conv", "O4-low", "S2-conv", "S2-low",
"S3-conv", "S3-low", "S4-conv", "S4-low"), class = "factor"),
variable = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Diversity",
"Evenness"), class = "factor"), N = c(4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4), value = c(0.78537789925, 0.613408315,
1.305194686, 0.79519430975, 0.4481728555, 0.30608817425,
1.20978861475, 0.8580643725, 0.92387324875, 0.630166121,
0.945954185, 0.561172324, 1.43952456275, 0.8616864655, 1.23679146725,
0.831737624, 1.033474108, 0.80689293925, 0.9910142125, 0.79342098075,
1.175512223, 0.6293940245, 0.981614832, 0.62342189825, 1.351710013,
0.805075937, 1.6598348325, 0.7983622545, 1.01606920875, 0.5751418795,
1.0500365255, 0.56408326225, 1.07162937725, 0.6756859865,
0.45699816625, 0.44444147325), sd = c(0.354077266902404,
0.208934910331856, 0.169501822767995, 0.0774319459391732,
0.737366460962239, 0.40697977697835, 0.494107033311986, 0.11906912863268,
0.491492768082854, 0.34236657107712, 0.219739438843007, 0.205905593411204,
0.319301583035043, 0.0696484379979274, 0.0563293598951725,
0.0978700910274188, 0.446850757364563, 0.175073468716825,
0.426859848850874, 0.180469101499932, 0.526842123835502,
0.200470277385505, 0.574885944755375, 0.27189545397305, 0.39621771945215,
0.150798258847229, 0.275863362594154, 0.111178397407429,
0.254811233135664, 0.158920851982914, 0.198698241334475,
0.0730606635175717, 0.717706309307313, 0.453776579066358,
0.574276936403411, 0.513758415496589), se = c(0.177038633451202,
0.104467455165928, 0.0847509113839974, 0.0387159729695866,
0.368683230481119, 0.203489888489175, 0.247053516655993,
0.0595345643163399, 0.245746384041427, 0.17118328553856,
0.109869719421504, 0.102952796705602, 0.159650791517521,
0.0348242189989637, 0.0281646799475863, 0.0489350455137094,
0.223425378682282, 0.0875367343584126, 0.213429924425437,
0.090234550749966, 0.263421061917751, 0.100235138692753,
0.287442972377688, 0.135947726986525, 0.198108859726075,
0.0753991294236146, 0.137931681297077, 0.0555891987037145,
0.127405616567832, 0.0794604259914568, 0.0993491206672376,
0.0365303317587859, 0.358853154653656, 0.226888289533179,
0.287138468201705, 0.256879207748294), ci = c(0.563415944919255,
0.332462066715199, 0.26971522480343, 0.123211505132525, 1.1733145846647,
0.647595643784969, 0.786234551289211, 0.189465554245211,
0.782074671929471, 0.544781614588516, 0.349654482635521,
0.327641747494367, 0.508080071600555, 0.110826207087643,
0.089632581638694, 0.155733154793995, 0.71103927089404, 0.278580956835532,
0.679229274424713, 0.287166612643164, 0.838323385234058,
0.318992946792351, 0.914771825423139, 0.432646341459985,
0.630470808679215, 0.23995368085579, 0.438960169525453, 0.176909640028318,
0.40546153371869, 0.252878539112781, 0.316173242000635, 0.116255819336536,
1.14203089616693, 0.722059798737006, 0.91380275723334, 0.817504285602766
)), .Names = c("Rot", "Rot.Herb", "variable", "N", "value",
"sd", "se", "ci"), row.names = c(NA, -36L), class = "data.frame")
and the code to graph
p <- ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable)))+
geom_bar(stat="identity", position="dodge")+
scale_fill_brewer(palette = "Set1")+
theme_bw() +
theme(panel.grid.major=element_blank()) +
facet_grid(~Rot, scales = "free_x", space="free_x")+
theme(legend.title=element_blank(),legend.text=element_text(size=20),legend.position="top")+
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9))+
xlab("\nTreatment") +
theme(axis.title = element_text(size=24,face="bold", vjust=4), axis.text.x = element_text(size=20,angle = 90, hjust = 1)) +
ylab("Shannon's H' and E'") +
theme(axis.title = element_text(size=24,face="bold", vjust=2), axis.text.y = element_text(size=20, color="black"))+
theme(strip.text.x = element_text(colour = "black", size = 20), strip.background = element_rect(fill = "white"))
produced graph (please don't mind the "2014" on the y-axis).
New code to annotate 2014, with help from eipi10
ann_text <- data.frame(x = "S4-conv",y = 1.75,lab = "2014", Rot.Herb=NA,
value=NA, variable=NA,
N=NA, sd=NA, se=NA, ci=NA,
Rot = factor("4-year",levels = c("2-year","3-year","4-year")))
I got an error saying Error: Discrete value supplied to continuous scale after I run p + geom_text(data = ann_text,label = "2014"). Please see what have been wrong with my code and data format. Thanks.
It turns out the issue is that when you include value=NA in ann_text it gets interpreted as logical (rather than numeric, which is its mode in Shannon.long2), causing the error because ggplot expects a numeric variable rather than a categorical one. Set value=NA_real_ (in addition to NA, R has class-specific missing value constants; see ?NA for more info) in ann_text to ensure value is interpreted as numeric and resolve the error. Or set value to any number, e.g., value=0.
In the example below, I've removed all of the theme and lab statements to shorten the code down to the essentials:
p = ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable))) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9)) +
facet_grid(~Rot, scales = "free_x", space="free_x")
ann_text <- data.frame(x = "S4-conv", y = 1.75, lab = "2014", Rot.Herb=NA,
value=NA_real_, variable=NA)
p + geom_text(data = ann_text, aes(label=lab, x, y))
Note that you also need to feed x and y values to geom_text to provide the label location.
Another option would be to just use the same x and y variable names as in your original data frame, since ggplot already knows these names and has scaled the graph based on them. Now the only missing column we need to add is variable:
ann_text <- data.frame(Rot.Herb = "S4-conv", value = 1.75, lab = "2014", variable=NA)
p + geom_text(data = ann_text, aes(label=lab, Rot.Herb, value))

visualize associations between two groups of data

Where each datapoint has a pairing of A and B and there multiple entries in A and multiple entires in B. IE multiple syndromes and multiple diagnoses, although for each datapoint there is one single syndrome-diagnoses pair.
Examples, suggestions, or ideas much appreciated
here's what the data is like. And I want to see connections between values of A and B (how many GG's are linked to TTs etc). Both are nominal datatypes.
ID,A ,B
1,GG,TT
2,AA,SS
3,BB,XX
4,DD,SS
5,DD,TT
6,CC,XX
7,HH,ZZ
8,AA,TT
9,CC,RR
10,DD,ZZ
11,AA,XX
12,AA,TT
13,DD,SS
14,DD,XX
15,AA,YY
16,CC,ZZ
17,FF,SS
18,FF,XX
19,BB,VV
20,GG,VV
21,GG,SS
22,AA,RR
23,AA,TT
24,AA,SS
25,CC,VV
26,CC,TT
27,FF,RR
28,GG,UU
29,CC,TT
30,BB,ZZ
31,II,TT
32,FF,RR
33,BB,SS
34,GG,YY
35,FF,RR
36,BB,VV
37,II,RR
38,CC,YY
39,FF,VV
40,AA,XX
41,AA,ZZ
42,GG,VV
43,BB,UU
44,II,UU
45,II,SS
46,DD,SS
47,AA,UU
48,BB,VV
49,GG,TT
50,BB,TT
Since your data is bipartite, I would suggest plotting points in the first factor on one side, points in the other factor on the other, with lines between them, like so:
The code I used to generate this was:
## Make up data.
data <- data.frame(X1=sample(state.region, 10),
X2=sample(state.region, 10))
## Set up plot window.
plot(0, xlim=c(0,1), ylim=c(0,1),
type="n", axes=FALSE, xlab="", ylab="")
factor.to.int <- function(f) {
(as.integer(f) - 1) / (length(levels(f)) - 1)
}
segments(factor.to.int(data$X1), 0, factor.to.int(data$X2), 1,
col=data$X1)
axis(1, at = seq(0, 1, by = 1 / (length(levels(data$X1)) - 1)),
labels = levels(data$X1))
axis(3, at = seq(0, 1, by = 1 / (length(levels(data$X2)) - 1)),
labels = levels(data$X2))
This is what I do. A darker colour indicates a more important combination of A and B.
dataset <- data.frame(A = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE), B = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE))
Counts <- as.data.frame(with(dataset, table(A, B)))
library(ggplot2)
ggplot(Counts, aes(x = A, y = B, fill = Freq)) + geom_tile() + scale_fill_gradient(low = "white", high = "black")
Or if you prefer lines
library(ggplot2)
dataset <- data.frame(A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE), B = sample(letters[1:5], 200, prob = runif(5), replace = TRUE))
Counts <- as.data.frame(with(dataset, table(A, B)))
Counts$X <- 0
Counts$Xend <- 1
Counts$Y <- as.numeric(Counts$A)
Counts$Yend <- as.numeric(Counts$B)
ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend, size = Freq)) +
geom_segment() + scale_x_continuous(breaks = 0:1, labels = c("A", "B")) +
scale_y_continuous(breaks = 1:5, labels = letters[1:5])
This third options add labels to the data points using geom_text().
library(ggplot2)
dataset <- data.frame(
A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE),
B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
)
Counts <- as.data.frame(with(dataset, table(A, B)))
Counts$X <- 0
Counts$Xend <- 1
Counts$Y <- as.numeric(Counts$A)
Counts$Yend <- as.numeric(Counts$B)
ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend)) +
geom_segment(aes(size = Freq)) +
scale_x_continuous(breaks = 0:1, labels = c("A", "B")) +
scale_y_continuous(breaks = -1) +
geom_text(aes(x = X, y = Y, label = A), colour = "red", size = 7, hjust = 1, vjust = 1) +
geom_text(aes(x = Xend, y = Yend, label = B), colour = "red", size = 7, hjust = 0, vjust = 0)
Maybe mosaicplot:
X <- structure(list(
ID = 1:50,
A = structure(c(6L, 1L, 2L, 4L, 4L, 3L, 7L, 1L, 3L, 4L, 1L, 1L, 4L, 4L, 1L, 3L, 5L, 5L, 2L, 6L, 6L, 1L, 1L, 1L, 3L, 3L, 5L, 6L, 3L, 2L, 8L, 5L, 2L, 6L, 5L, 2L, 8L, 3L, 5L, 1L, 1L, 6L, 2L, 8L, 8L, 4L, 1L, 2L, 6L, 2L), .Label = c("AA","BB", "CC", "DD", "FF", "GG", "HH", "II"), class = "factor"),
B = structure(c(3L, 2L, 6L, 2L, 3L, 6L, 8L, 3L, 1L, 8L, 6L, 3L, 2L, 6L, 7L, 8L, 2L, 6L, 5L, 5L, 2L, 1L, 3L, 2L, 5L, 3L, 1L, 4L, 3L, 8L, 3L, 1L, 2L, 7L, 1L, 5L, 1L, 7L, 5L, 6L, 8L, 5L, 4L, 4L, 2L, 2L, 4L, 5L, 3L, 3L), .Label = c("RR", "SS", "TT", "UU", "VV", "XX", "YY", "ZZ"), class = "factor")
), .Names = c("ID", "A", "B"), class = "data.frame", row.names = c(NA, -50L)
)
mosaicplot(with(X,table(A,B)))
For you example dataset:
Thanks! I think that the connectivity between elements in each class is best visualized by the link graph examples given by both Jonathon and Thierry. Thierry's 2nd which shows the magnitude is definitely where i will start.
update
thanks everyone for you ideas and tips!
I came acrossthe bipartite package that has functions to visualize this kind of data. I think its a clean visualization of the relationships I am trying to show.
did:
library(bipartite)
dataset <- data.frame(
A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE),
B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
)
datamat <- as.matrix(table(dataset$A, dataset$B))
visweb(datamat, text = "interaction", textsize = .8)
giving:
visweb result
couldnt put image in as a new user :(

Resources