Adding lines connecting means to ggplot (Raincloud Plots) - r

I have a ggplot to which I want to add a line connecting the means. However, I keep getting an Error message:
"geom_path: Each group consists of only one
observation. Do you need to adjust the group
aesthetic?"
I tried solutions suggested on here, but these seemingly stopped working years ago. Hence, I opened a new post.
#some packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(here, readr, cowplot, tidyr, ggplot2, dplyr)
#some functions from https://github.com/RainCloudPlots/RainCloudPlots
source("R_rainclouds.R")
source("summarySE.R")
source("simulateData.R")
#some data
df3 <- structure(list(participant = c(1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L), condition = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("RT_Cau", "RT_Moro",
"RT_Asi"), class = "factor"), RT = c(1.44248448543333, 2.73934002973517,
1.89927013090706, 1.32510448686595, 2.44101598069973, 2.84290772015973,
1.19419819108836, 2.12124958877175, 1.14707311860052, 2.72286767178203,
1.15914495363538, 1.5340050993702, 1.62616192435053, 1.32694796283192,
1.2720800304128, 0.99275928310549, 1.04329096409593, 1.43288644582691,
1.60302970699442, 1.3393626055176, 1.24088162033185, 2.42448868318791,
1.6398716779282, 1.53816275909702, 1.51033130413559, 3.226993255043,
2.1915727996463, 1.39240057519678, 3.0538809712989, 2.52658416881183,
1.16366335020089, 2.33377114484134, 1.39357978132538, 2.691606623485,
1.21999657945028, 1.72195011524003, 1.38834235226937, 1.44350802586345,
1.29563539425317, 0.909762618509679, 1.13583585924538, 1.58240957515452,
1.82142351906117, 1.3644415734435, 1.32141664778601, 2.23277562688125,
1.5773976029336, 1.43200172590417, 1.68991681725, 2.9617422858462,
1.60886625604519, 1.38647850513866, 3.46156610375971, 2.96950698342897,
1.17905107770577, 2.36256332626113, 1.31254065801458, 3.204902618708,
1.21067325368702, 1.80371515914087, 1.57816183853565, 1.40761655308155,
1.27304559913463, 1.07621914272144, 1.04203150853998, 1.58958820979388,
1.79859778873147, 1.19249820050996, 1.4116357628608, 2.15806795062162,
1.70597872926531, 1.66135756110131)), row.names = c(NA, -72L), class = "data.frame")
#make a summary of the data
df4 <- summarySE(df3, measurevar = "RT", groupvars = c("condition"))
#a working plot that shows dots, boxplot, distribution, and mean+SE
#I want to have lines connecting the mean dots.
ggplot(df3,aes(x=condition,y=RT,fill=condition,col=condition))+
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .6,adjust =4)+
geom_point(aes(x = as.numeric(condition)-.15, y = RT, colour = condition),position = position_jitter(width = .05), size = .25, shape = 20)+
geom_boxplot(aes(x = condition, y = RT, fill = condition),outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
geom_point(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition), shape = 18) +
geom_errorbar(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition, ymin = RT_mean-se, ymax = RT_mean+se), width = .05) +
ylab('RT')+
scale_fill_brewer(palette = "Dark2")+scale_colour_brewer(palette = "Dark2")+
guides(fill = FALSE, col = FALSE) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)
) # +
# geom_line(data = df4, aes(x = as.numeric(condition)+.1, #y = RT_mean, group = condition, colour = condition), #linetype = 3)
#The commented outpart is my attempt to connect it with lines, which produces the described error.
#If you do not want to load the package from github, here is the raw code of the summariseSE function:
# summarySE function
summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = FALSE,
conf.interval = .95, .drop = TRUE) {
library(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function(x, na.rm = FALSE) {
if (na.rm) {
sum(!is.na(x))
} else {
length(x)
}
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, median, and sd
datac <- plyr::ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean(xx[[col]], na.rm=na.rm),
median = median(xx[[col]], na.rm=na.rm),
sd = sd(xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" and "median" columns
datac <- plyr::rename(datac, c("mean" = paste(measurevar, "_mean", sep = "")))
datac <- plyr::rename(datac, c("median" = paste(measurevar, "_median", sep = "")))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval / 2 + .5, datac$N - 1)
datac$ci <- datac$se * ciMult
return(datac)
}
The final plot should have lines connecting the means, similar to Plot 11 of https://wellcomeopenresearch.org/articles/4-63/v2.
Thanks already for your help!

This can be useful:
#Code
ggplot(df3,aes(x=condition,y=RT,fill=condition,col=condition))+
geom_flat_violin(position = position_nudge(x = .2, y = 0),
alpha = .6,adjust =4)+
geom_point(aes(x = as.numeric(condition)-.15, y = RT,
colour = condition),
position = position_jitter(width = .05), size = .25, shape = 20)+
geom_boxplot(aes(x = condition, y = RT, fill = condition),
outlier.shape = NA, alpha = .5,
width = .1, colour = "black") +
geom_point(data = df4, aes(x = as.numeric(condition)+.1,
y = RT_mean,
group = condition, colour = condition), shape = 18) +
geom_errorbar(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition, ymin = RT_mean-se, ymax = RT_mean+se), width = .05) +
ylab('RT')+
scale_fill_brewer(palette = "Dark2")+scale_colour_brewer(palette = "Dark2")+
guides(fill = FALSE, col = FALSE) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)
) +
geom_line(data = df4, aes(x = as.numeric(condition)+.1,
y = RT_mean, group = 1)
Output:

Related

Standard column width in facetted and grouped ggplot bar plot

I've made a bar chart using ggplot with grouped data, and facetted with facet_grid. The column widths are inconsistent, so I want to make them all the same. I've read this can be done with preserve="single, but it seems to mess up the position dodging. Any idea how to prevent this happening??
Here is a small sample of the data:
data <- structure(list(grp2 = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L,
7L), .Label = c("CSF1", "CSF2", "PC", "NC", "GPC", "GNC", "standard"
), class = "factor"), label2 = structure(c(7L, 8L, 9L, 7L, 8L,
9L, 7L, 15L, 15L, 15L, 15L, 15L, 7L, 8L, 9L, 7L, 8L, 9L, 7L,
15L, 15L, 15L, 15L, 15L), .Label = c("CSF1_raw", "CSF1_supernatant",
"CSF1_pellet", "CSF2_raw", "CSF2_supernatant", "CSF2_pellet",
"PC_raw", "PC_supernatant", "PC_pellet", "NC_raw", "NC_supernatant",
"NC_pellet", "GPC", "GNC", "standard", "NC"), class = "factor"),
mda_label = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L
), .Label = c("none", "mda_20", "mda_200"), class = "factor"),
conc = c(`7` = 0, `8` = 0, `9` = 0.324886127298521, `55` = 4.14765656994934,
`56` = 1.16840050032707, `57` = 8.33529714053568, `76` = 10.6220645144775,
`77` = 48.9241552191721, `78` = 4.51513315624087, `79` = 1.03887911533275,
`80` = 0.0445944796011582, `81` = 0.00484116548901831, `89` = 0,
`90` = 0, `91` = 0.322922569348207, `137` = 6.38488684568018,
`138` = 1.68909814271646, `139` = 7.61828609738757, `158` = 15.3082130743032,
`159` = 41.3127531345335, `160` = 4.64193087683391, `161` = 0.411672491030815,
`162` = 0.0568193835425769, `163` = 0.00439419098560105)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame"))
Here's the initial plot:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge()) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9)) +
geom_point(position = position_dodge(width=0.9), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
But when I try to standardise the column widths with preserve="single", it gets messed up:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge(preserve="single")) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9, preserve="single")) +
geom_point(position = position_dodge(width=0.9, preserve="single"), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Since you're using data that as 0 values, you could make the 0 values for the other 'mda_label' on grp2/label2 standard categories.
data <- rbind(data, data.frame(grp2 = c("standard", "standard"),
label2 = c("standard", "standard"),
mda_label = c("mda_20", "mda_200"),
conc = c(0, 0)))
Also you never actually make the bar plot
data %>%
ggplot(aes(label2, conc, fill = mda_label)) +
geom_col(position = position_dodge(width = 1)) +
facet_grid(. ~ grp2, scales = "free", space = "free")

Change color for specific data in ggplot2

I have 15 measurement points and i defined "renkler" color palette for them. I want to change the color of 2 (red: DEF-2 and DEF-13 points in the ps_no column) in these 15.
My codes are
library(ggplot2)
library(reshape)
dat <- read.delim("a.txt")
dat$Date <- as.Date(dat$Date,"%d/%m/%Y")
# order
dat$parameter <- factor(dat$parameter, levels = c("DEF-2", "DEF-13"))
dat$ps_no <- factor(dat$ps_no, levels = c("DEF-2", "PS.584", "PS.585", "PS.586", "PS.603", "PS.630", "DEF-13", "PS.600", "PS.667", "PS.690", "PS.714", "PS.734", "PS.754", "PS.811", "PS.813"))
# create own color palette
library(RColorBrewer)
renkler = c(brewer.pal(name="Set2", n = 7), brewer.pal(name="Set2", n = 8))
# Setup plot without facets
p <- ggplot(data = dat, aes(x = Date, y = value)) +
geom_line(aes(color = ps_no)) +
geom_point(aes(color = ps_no)) +
scale_color_manual(values = renkler) + # oluşturduğumuz paleti yüklemek için
scale_x_date(date_breaks = "1 months",date_labels = "%Y-%m",
limits = as.Date.character(c("01/12/2017","31/12/2018"),
format = "%d/%m/%Y")) +
ylab("[mm/year]") +
xlab("") +
facet_grid(parameter ~ .) +
theme_bw()
p + theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
)
and the data output with dput(dat):
structure(list(parameter = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("DEF-13",
"DEF-2"), class = "factor"), ps_no = structure(c(3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 8L, 8L, 8L, 2L,
2L, 2L, 6L, 6L, 6L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L,
12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 1L,
1L, 1L), .Label = c("DEF-13", "DEF-2", "PS.584", "PS.585", "PS.586",
"PS.600", "PS.603", "PS.630", "PS.667", "PS.690", "PS.714", "PS.734",
"PS.754", "PS.811", "PS.813"), class = "factor"), Date = structure(c(17534,
17546, 17870, 17882, 17534, 17546, 17870, 17882, 17534, 17546,
17870, 17882, 17534, 17546, 17882, 17534, 17546, 17882, 17536,
17557, 17879, 17534, 17546, 17882, 17534, 17546, 17882, 17534,
17546, 17882, 17534, 17546, 17882, 17534, 17546, 17882, 17534,
17546, 17882, 17534, 17546, 17882, 17534, 17546, 17882, 17536,
17549, 17886), class = "Date"), value = c(0, 1.23684, -12.15729097,
-11.4102363, 0, 2.45200798, 1.12950398, -2.76779102, 0, 0.924571,
-7.1917482, -6.2764626, 0, -4.0725265, 0.4847485, 0, 0.290382,
-6.098794, 0, 0.813289109, -0.426076522, 0, 1.7502, -5.139665,
0, -29.67012, -14.956098, 0, 12.8852143, 7.4377433, 0, 1.404183,
-12.426633, 0, -24.09551, -7.619493, 0, -4.194441, -16.258703,
0, -0.835691, -10.504454, 0, 1.311699, 6.30102, 0, -1.49366556,
-1.835284539)), row.names = c(NA, -48L), class = "data.frame")
And also I need to change legend tittle (ps_no) and the texts on the right side of plots (DEF-2 and DEF-13).
Thank you.
Edit:
I filter the data which I want to show different color with using filter command. After filter command, I add a command line for geom_line and another command line for geom_point. It is working in the plot. But this is not the answer literally because the colors in the legend do not change.
So this the the new version of codes:
library(ggplot2)
library(reshape)
dat <- read.delim("aroundDEF.txt")
dat$Date <- as.Date(dat$Date,"%d/%m/%Y")
# order
dat$parameter <- factor(dat$parameter, levels = c("DEF-2", "DEF-13"))
dat$ps_no <- factor(dat$ps_no, levels = c("DEF-2", "PS.584", "PS.585", "PS.586", "PS.603", "PS.630", "DEF-13", "PS.600", "PS.667", "PS.690", "PS.714", "PS.734", "PS.754", "PS.811", "PS.813"))
# create own color palette
library(RColorBrewer)
renkler = c(brewer.pal(name="Set2", n = 7), brewer.pal(name="Set2", n = 8))
geom_line(aes(color = ps_no)) +
geom_line(data=highlight_df, aes(color = ps_no), color='#da0018') +
geom_point(aes(color = ps_no)) +
geom_point(data=highlight_df, aes(color = ps_no), color='#da0018') +
# filter dataframe to get data to be highligheted
highlight_df <- dat %>%
filter(ps_no=="DEF-2" | ps_no=="DEF-13")
# Setup plot without facets
p <- ggplot(data = dat, aes(x = Date, y = value)) +
scale_color_manual(values = renkler) +
scale_x_date(date_breaks = "1 months",date_labels = "%Y-%m",
limits = as.Date.character(c("01/12/2017","31/12/2018"),
format = "%d/%m/%Y")) +
ylab("[mm/year]") +
xlab("") +
facet_grid(parameter ~ .
, labeller = as_labeller( c("DEF-2" = "DEF-2 and around", "DEF-13" = "DEF-13 and around"))) +
theme_bw()
p + theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
)
In short, still I need an answer...
After renkler variable:
renkler[1]= "#DA0018"
renkler[7]= "#DA0018"
For the legend title:
scale_color_manual(values = renkler, name="new name")

How to add marginal rugs above bars of a bar chart with ggplot2

Is it possible to add marginal rug lines above bars? Using the data set below, how can you add 4 rug lines above Brazil, 8 above Canada, etc.
ctryfees <- feesctry %>% group_by(country) %>% summarise(total = sum(fees))
library(ggplot2)
library(ggthemes)
ggplot(ctryfees, aes(x = country, y = total)) +
geom_bar(stat = "identity") + theme_tufte() +
ggtitle("Fees Paid Law Firms per Country\nNumber of Firms Paid\n") +
labs(x = "", y = "") +
scale_y_continuous(label = dollar.format) +
geom_rug(data = feesctry, mapping = aes(x = country, y = firms), sides = "top")
The code does not work after the scale_y_continuous line as it throws this error: Error: Discrete value supplied to continuous scale
> dput(feesctry)
structure(list(country = structure(c(1L, 1L, 1L, 1L, 2L, 3L,
4L, 4L, 5L, 5L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 3L, 3L,
3L, 3L), .Label = c("Brazil", "Canada", "China", "France", "Germany",
"UK"), class = "factor"), firms = structure(c(1L, 2L, 3L, 4L,
5L, 13L, 18L, 19L, 20L, 21L, 22L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
23L, 24L, 14L, 15L, 16L, 17L), .Label = c("brazil1", "brazil2",
"brazil3", "brazil4", "can1", "can2", "can3", "can4", "can5",
"can6", "can7", "can8", "china1", "china2", "china3", "china4",
"china5", "france1", "france2", "german1", "german2", "uk1",
"uk2", "uk3"), class = "factor"), fees = c(80000, 80000, 80000,
80000, 1e+05, 5e+05, 2e+05, 2e+05, 1e+05, 1e+05, 5e+05, 1e+05,
1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 5e+05, 5e+05, 5e+05,
5e+05, 5e+05, 5e+05)), .Names = c("country", "firms", "fees"), row.names = c(NA,
-24L), class = "data.frame")
From
p <-
ggplot(ctryfees, aes(x = country, y = total)) +
geom_bar(stat = "identity") + theme_tufte() +
ggtitle("Fees Paid Law Firms per Country\nNumber of Firms Paid\n") +
labs(x = "", y = "") +
scale_y_continuous(label = dollar_format())
you could try
p + geom_rug(data = transform(feesctry, id = as.numeric(country)),
mapping = aes(x = ave(id, id, FUN = function(x)
x + scale(seq_along(x), scale = 50)),
y = 1),
sides = "top")
or just
p + geom_rug(data = feesctry,
mapping = aes(x = jitter(as.numeric(country)),
y = 1),
sides = "top")

ggplot2 - geom_ribbon bug?

This code throws an error and I can't figure out why...
library( plyr )
library( ggplot2 )
library( grid )
library( proto )
# the master dataframe
myDF = structure(list(Agg52WkPrceRange = c(2L, 2L, 2L, 2L, 2L, 2L, 3L,
5L, 3L, 5L, 3L, 5L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 4L), OfResidualPntReturn52CWk = c(0.201477324,
0.22350293, 0.248388728, 0.173871456, 0.201090654, 0.170666183,
0.18681883, 0.178840521, 0.159744891, 0.129811042, 0.13209741,
0.114989407, 0.128347625, 0.100945992, 0.057017002, 0.081123718,
0.018900252, 0.021784814, 0.081931816, 0.059067844, 0.095879746,
0.038977508, 0.078895248, 0.051344317, 0.077515295, 0.011776214,
0.099216033, 0.054714439, 0.022879951, -0.079558277, -0.050889584,
-0.006934821, -0.003407085, 0.032545474, -0.003387139, 0.030418511,
0.053942523, 0.051398537, 0.073482355, 0.087963039, 0.079555591,
-0.040490418, -0.130754663, -0.125826649, -0.141766316, -0.150708718,
-0.171906882, -0.174623614, -0.212945405, -0.174480554), IndependentVariableBinned = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 3L, 10L, 3L, 10L, 4L, 10L, 4L, 2L, 4L, 4L,
4L, 5L, 2L, 2L, 2L, 3L, 3L, 5L, 5L, 5L, 5L, 6L, 3L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 8L, 9L, 9L, 9L, 9L,
10L, 10L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10"), class = "factor")), .Names = c("Agg52WkPrceRange",
"OfResidualPntReturn52CWk", "IndependentVariableBinned"), row.names = 28653:28702, class = "data.frame")
# secondary data frame
meansByIndependentVariableBin = ddply( myDF , .( IndependentVariableBinned ) , function( df ) mean( df[[ "OfResidualPntReturn52CWk" ]] ) )
# construct the plot
thePlot = ggplot( myDF , aes_string( x = "IndependentVariableBinned" , y = "OfResidualPntReturn52CWk" ) )
thePlot = thePlot + geom_point( data = meansByIndependentVariableBin , aes( x = IndependentVariableBinned , y = V1 ) )
thePlot = thePlot + geom_line( data = meansByIndependentVariableBin , aes( x = IndependentVariableBinned , y = V1 , group = 1 ) )
thePlot = thePlot + geom_ribbon( data = meansByIndependentVariableBin , aes( group = 1 , x = IndependentVariableBinned , ymin = V1 - 1 , ymax = V1 + 1 ) )
# print - error!
print( thePlot )
I've tried with/without group=1. The error is:
Error in eval(expr, envir, enclos) :
object 'OfRelStrength52CWk' not found
but not sure how that is relevant?? I must be missing something obvious. Take away the last geom (ribbon) and it plots just fine!
There is no bug in geom_ribbon. Your error is because you are defining y = OfResidualPntReturn52CWk in your ggplot call as a result of which geom_ribbon is looking for it. Since you are passing a different data frame to geom_ribbon, there is confusion and hence an error. From your plotting call, although you are using y = OfResidualPntReturn52CWk in your ggplot call, there is no layer where you are calling it, and hence it is immaterial to the plot.
Here is how to do it correctly (if I am understanding what you intend to do in this plot)
MIVB = meansByIndependentVariableBin
thePlot = ggplot(myDF , aes(x = IndependentVariableBinned)) +
geom_point(aes(y = OfResidualPntReturn52CWk)) +
geom_point(data = MIVB, aes(y = V1), colour = 'red') +
geom_line(data = MIVB , aes(y = V1, group = 1), colour = 'red') +
geom_ribbon(data = MIVB, aes(group = 1, ymin = V1 - 1 , ymax = V1 + 1),
alpha = 0.2)
Here is the output it produces
Here is another way to do it, without computing the means in advance. Also I have used mean +- standard errors in the ribbon as I find the choice of +- 1 to be arbitrary
myDF$IndependentVariableBinned = as.numeric(myDF$IndependentVariableBinned)
thePlot = ggplot(myDF , aes(x = IndependentVariableBinned, y =
OfResidualPntReturn52CWk)) +
geom_point() +
geom_point(stat = 'summary', fun.y = 'mean', colour = 'red') +
geom_line(stat = 'summary', fun.y = 'mean', colour = 'red') +
geom_ribbon(stat = 'summary', fun.data = 'mean_se', alpha = 0.2)
This produces
#Ramnath is spot on. Your initial call to ggplot is not needed as all of the layers you are plotting come from the summarized data.frame made by ddply(). You can also simplify your call to ddply() by using the summarize function:
meansByIndependentVariableBin2 = ddply( myDF , .( IndependentVariableBinned )
, summarize, means = mean(OfResidualPntReturn52CWk) )
I would then plot your graph as such:
ggplot(meansByIndependentVariableBin2, aes(x = as.numeric(IndependentVariableBinned), y = means)) +
geom_ribbon(aes(ymin = (means - 1), ymax = (means + 1)), alpha = .4) +
geom_point() +
geom_line()
Is that what you had in mind? I added an alpha to the ribbon layer so we can see the lines and points clearly.

visualize associations between two groups of data

Where each datapoint has a pairing of A and B and there multiple entries in A and multiple entires in B. IE multiple syndromes and multiple diagnoses, although for each datapoint there is one single syndrome-diagnoses pair.
Examples, suggestions, or ideas much appreciated
here's what the data is like. And I want to see connections between values of A and B (how many GG's are linked to TTs etc). Both are nominal datatypes.
ID,A ,B
1,GG,TT
2,AA,SS
3,BB,XX
4,DD,SS
5,DD,TT
6,CC,XX
7,HH,ZZ
8,AA,TT
9,CC,RR
10,DD,ZZ
11,AA,XX
12,AA,TT
13,DD,SS
14,DD,XX
15,AA,YY
16,CC,ZZ
17,FF,SS
18,FF,XX
19,BB,VV
20,GG,VV
21,GG,SS
22,AA,RR
23,AA,TT
24,AA,SS
25,CC,VV
26,CC,TT
27,FF,RR
28,GG,UU
29,CC,TT
30,BB,ZZ
31,II,TT
32,FF,RR
33,BB,SS
34,GG,YY
35,FF,RR
36,BB,VV
37,II,RR
38,CC,YY
39,FF,VV
40,AA,XX
41,AA,ZZ
42,GG,VV
43,BB,UU
44,II,UU
45,II,SS
46,DD,SS
47,AA,UU
48,BB,VV
49,GG,TT
50,BB,TT
Since your data is bipartite, I would suggest plotting points in the first factor on one side, points in the other factor on the other, with lines between them, like so:
The code I used to generate this was:
## Make up data.
data <- data.frame(X1=sample(state.region, 10),
X2=sample(state.region, 10))
## Set up plot window.
plot(0, xlim=c(0,1), ylim=c(0,1),
type="n", axes=FALSE, xlab="", ylab="")
factor.to.int <- function(f) {
(as.integer(f) - 1) / (length(levels(f)) - 1)
}
segments(factor.to.int(data$X1), 0, factor.to.int(data$X2), 1,
col=data$X1)
axis(1, at = seq(0, 1, by = 1 / (length(levels(data$X1)) - 1)),
labels = levels(data$X1))
axis(3, at = seq(0, 1, by = 1 / (length(levels(data$X2)) - 1)),
labels = levels(data$X2))
This is what I do. A darker colour indicates a more important combination of A and B.
dataset <- data.frame(A = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE), B = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE))
Counts <- as.data.frame(with(dataset, table(A, B)))
library(ggplot2)
ggplot(Counts, aes(x = A, y = B, fill = Freq)) + geom_tile() + scale_fill_gradient(low = "white", high = "black")
Or if you prefer lines
library(ggplot2)
dataset <- data.frame(A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE), B = sample(letters[1:5], 200, prob = runif(5), replace = TRUE))
Counts <- as.data.frame(with(dataset, table(A, B)))
Counts$X <- 0
Counts$Xend <- 1
Counts$Y <- as.numeric(Counts$A)
Counts$Yend <- as.numeric(Counts$B)
ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend, size = Freq)) +
geom_segment() + scale_x_continuous(breaks = 0:1, labels = c("A", "B")) +
scale_y_continuous(breaks = 1:5, labels = letters[1:5])
This third options add labels to the data points using geom_text().
library(ggplot2)
dataset <- data.frame(
A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE),
B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
)
Counts <- as.data.frame(with(dataset, table(A, B)))
Counts$X <- 0
Counts$Xend <- 1
Counts$Y <- as.numeric(Counts$A)
Counts$Yend <- as.numeric(Counts$B)
ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend)) +
geom_segment(aes(size = Freq)) +
scale_x_continuous(breaks = 0:1, labels = c("A", "B")) +
scale_y_continuous(breaks = -1) +
geom_text(aes(x = X, y = Y, label = A), colour = "red", size = 7, hjust = 1, vjust = 1) +
geom_text(aes(x = Xend, y = Yend, label = B), colour = "red", size = 7, hjust = 0, vjust = 0)
Maybe mosaicplot:
X <- structure(list(
ID = 1:50,
A = structure(c(6L, 1L, 2L, 4L, 4L, 3L, 7L, 1L, 3L, 4L, 1L, 1L, 4L, 4L, 1L, 3L, 5L, 5L, 2L, 6L, 6L, 1L, 1L, 1L, 3L, 3L, 5L, 6L, 3L, 2L, 8L, 5L, 2L, 6L, 5L, 2L, 8L, 3L, 5L, 1L, 1L, 6L, 2L, 8L, 8L, 4L, 1L, 2L, 6L, 2L), .Label = c("AA","BB", "CC", "DD", "FF", "GG", "HH", "II"), class = "factor"),
B = structure(c(3L, 2L, 6L, 2L, 3L, 6L, 8L, 3L, 1L, 8L, 6L, 3L, 2L, 6L, 7L, 8L, 2L, 6L, 5L, 5L, 2L, 1L, 3L, 2L, 5L, 3L, 1L, 4L, 3L, 8L, 3L, 1L, 2L, 7L, 1L, 5L, 1L, 7L, 5L, 6L, 8L, 5L, 4L, 4L, 2L, 2L, 4L, 5L, 3L, 3L), .Label = c("RR", "SS", "TT", "UU", "VV", "XX", "YY", "ZZ"), class = "factor")
), .Names = c("ID", "A", "B"), class = "data.frame", row.names = c(NA, -50L)
)
mosaicplot(with(X,table(A,B)))
For you example dataset:
Thanks! I think that the connectivity between elements in each class is best visualized by the link graph examples given by both Jonathon and Thierry. Thierry's 2nd which shows the magnitude is definitely where i will start.
update
thanks everyone for you ideas and tips!
I came acrossthe bipartite package that has functions to visualize this kind of data. I think its a clean visualization of the relationships I am trying to show.
did:
library(bipartite)
dataset <- data.frame(
A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE),
B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
)
datamat <- as.matrix(table(dataset$A, dataset$B))
visweb(datamat, text = "interaction", textsize = .8)
giving:
visweb result
couldnt put image in as a new user :(

Resources