Ggmap-geompoint, how to make grouping? - r

Suppose I have this dataframe
latitude longitude category
42.39905 -72.93871 A
42.39905 -73.93871 B
43.37471 -73.36336 A
43.37471 -74.36336 B
44.28322 -74.31423 B
What I would like to do is to group the coordinates by its integer. Then for each group, I could create a bubble with a size function on the counts in a group.
The colour diverges from A to B, based on how many A than B. So far, I've been doing this,
map = get_map(location="jk",zoom=6,source="stamen")
#Plot the point
ggmap(map)+
geom_point(data=zipmap,
aes(x=round(longitude),y=round(latitude),colour=category))+
scale_color_brewer(type='div')
But as you would expect, the colour is not diverging, and the size of the bubble is not implemented. How could I achieve this? I can't use scale_x_continuous, as it already used somewhere in ggmap

Here is one direction to try.
dput(df)
structure(list(latitude = c(42.39905, 42.39905, 43.37471, 43.37471,
44.28322), longitude = c(-73, -74, -73, -74, -74), category = structure(c(1L,
2L, 1L, 2L, 2L), .Label = c("A", "B"), class = "factor"), latround = structure(c(1L,
1L, 2L, 2L, 3L), .Label = c("42", "43", "44"), class = "factor"),
longround = structure(c(2L, 1L, 2L, 1L, 1L), .Label = c("-74",
"-73"), class = "factor")), .Names = c("latitude", "longitude",
"category", "latround", "longround"), row.names = c(NA, -5L), class = "data.frame")
df$latround <- as.factor(round(df$latitude)) # round the coords
df$longround <- as.factor(round(df$longitude))
library(dplyr) # group by rounded coordinates and count the categories
df2 <- df %>% group_by(latround) %>% summarise(catnumber = n())
latround catnumber
1 42 2
2 43 2
3 44 1
library(ggmap)
From here you don't specify the location jk so I outlined an approach to plotting.
map <- get_map(location="jk",zoom=6,source="stamen")
#Plot the point
ggmap(map)+
geom_point(df2, aes(x=longround),y=latround), size = catnumber, colour=catnumber))+
scale_color_brewer(type='div') # more is needed in the ggmap code

Related

connect points within position_dodged factor x-axis in ggplot2

I'm trying to add significance annotations to an errorbar plot with a factor x-axis and dodged groups within each level of the x-axis. It is a similar but NOT identical use case to this
My base errorbar plot is:
library(ggplot2)
library(dplyr)
pres_prob_pd = structure(list(x = structure(c(1, 1, 1, 2, 2, 2, 3, 3, 3), labels = c(`1` = 1,
`2` = 2, `3` = 3)), predicted = c(0.571584427222816, 0.712630712634987,
0.156061969566517, 0.0162388386564817, 0.0371877245103279, 0.0165022541901018,
0.131528946944238, 0.35927812866896, 0.0708662221985375), std.error = c(0.355802875027348,
0.471253661425626, 0.457109887762665, 0.352871728451576, 0.442646879181155,
0.425913568532558, 0.376552208691762, 0.48178172708116, 0.451758041335245
), conf.low = c(0.399141779923204, 0.496138837620712, 0.0701919316506831,
0.00819832576725402, 0.0159620304815404, 0.00722904089045731,
0.0675129352870401, 0.17905347369819, 0.030504893442457), conf.high = c(0.728233665534388,
0.861980236164486, 0.311759350126477, 0.031911364587827, 0.0842227723261319,
0.0372248587668487, 0.240584344249407, 0.590437963881823, 0.156035177669385
), group = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain",
"neutral", "uncertain"), class = "factor"), group_col = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", "neutral",
"uncertain"), class = "factor"), language = structure(c(2L, 2L,
2L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("english", "dutch", "german"
), class = "factor"), top = c(0.861980236164486, 0.861980236164486,
0.861980236164486, 0.0842227723261319, 0.0842227723261319, 0.0842227723261319,
0.590437963881823, 0.590437963881823, 0.590437963881823)), row.names = c(NA,
-9L), groups = structure(list(language = structure(1:3, .Label = c("english",
"dutch", "german"), class = "factor"), .rows = structure(list(
4:6, 1:3, 7:9), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
#dodge
pd = position_dodge(.75)
#plot
p = ggplot(pres_prob_pd,aes(x=language,y=predicted,color=group,shape=group)) +
geom_point(position=pd,size=2) +
geom_errorbar(aes(ymax=conf.high,ymin=conf.low),width=.125,position=pd)
p
What I want to do is annotate the plot such that the contrasts between group within each level of language are annotated for significance. I've plotted points representing the relevant contrasts and (toy) sig. annotations as follows:
#bump function
f = function(x){
v = c()
bump=0.025
constant = 0
for(i in x){
v = c(v,i+constant+bump)
bump = bump + 0.075
}
v
}
#create contrasts
combs = data.frame(gtools::combinations(3, 2, v=c("certain", "neutral", "uncertain"), set=F, repeats.allowed=F)) %>%
mutate(contrast=c("cont_1","cont_2","cont_3"))
combs = rbind(combs %>% mutate(language = 'english'),
combs %>% mutate(language='dutch'),
combs %>% mutate(language = "german")) %>%
left_join(select(pres_prob_pd,language:top)%>%distinct(),by='language') %>%
group_by(language)
#long transform and calc y_pos
combs_long = mutate(combs,y_pos=f(top)) %>% gather(long, probability, X1:X2, factor_key=TRUE) %>% mutate(language=factor(language,levels=c("english","dutch","german"))) %>%
arrange(language,contrast)
#back to wide
combs_wide =combs_long %>% spread(long,probability)
combs_wide$p = rep(c('***',"*","ns"),3)
#plot
p +
geom_point(data=combs_long,
aes(x = language,
color=probability,
shape=probability,
y=y_pos),
inherit.aes = T,
position=pd,
size=2) +
geom_text(data=combs_wide,
aes(x=language,
label=p,
y=y_pos+.025,
group=X1),
color='black',
position=position_dodge(.75),
inherit.aes = F)
What I am failing to achieve is plotting a line connecting each of the contrasts of group within each level of language, as is standard when annotating significant group-wise differences. Any help much appreciated!

ggraph edges are connecting wrong?

I am working on generating a hierarchical edge plot where the edge's color/transparency/thickness varies by the column (pvalue) in my connect dataframe, however the color/transparency/thickness of the edges in the plot I generated don't always map to the values in column (pvalue). For example, subgroup1 and subgroup4 should have the strongest thickest connection (pvalue is E-280), when in fact they don't, rather the connection between subgroup3 and subgroup4 looks to be strongest.
This data generates a reproducible example:
> dput(vertices)
structure(list(name = structure(c(3L, 1L, 2L, 4L, 5L, 6L, 7L), .Label = c("gp1",
"gp2", "origin", "subgroup1", "subgroup2", "subgroup3", "subgroup4"
), class = "factor"), id = c(NA, NA, NA, 1L, 2L, 3L, 4L), angle = c(NA,
NA, NA, 0, -90, 0, -90), hjust = c(NA, NA, NA, 1, 1, 1, 1)), row.names = c(NA,
-7L), class = "data.frame")
> dput(hierarchy)
structure(list(from = structure(c(3L, 3L, 1L, 1L, 2L, 2L), .Label = c("gp1",
"gp2", "origin"), class = "factor"), to = structure(1:6, .Label = c("gp1",
"gp2", "subgroup1", "subgroup2", "subgroup3", "subgroup4"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
> dput(connect)
structure(list(from = structure(c(1L, 1L, 2L, 3L, 1L, 2L, 3L,
1L), .Label = c("subgroup1", "subgroup2", "subgroup3"), class = "factor"),
to = structure(c(1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L), .Label = c("subgroup2",
"subgroup3", "subgroup4"), class = "factor"), pvalue = c(1.68e-204,
1.59e-121, 9.32e-73, 9.32e-73, 1.59e-21, 9.32e-50, 9.32e-40,
9.32e-280)), class = "data.frame", row.names = c(NA, -8L))
and this is the code I used to make this example plot:
from <- match( connect$from, vertices$name)
to <- match( connect$to, vertices$name)
col <- connect$pvalue
#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id <- NA
myleaves <- which(is.na( match(vertices$name, hierarchy$from) ))
nleaves <- length(myleaves)
vertices$id[ myleaves ] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust <- ifelse( vertices$id < 41, 1, 0)
# flip angle BY to make them readable
vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05), size = 2, alpha = 0.8) +
geom_conn_bundle(data = get_con(from = from, to = to, col = col), aes(colour=col, alpha = col, width = col)) +
geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=3.5, alpha=0.6) +scale_edge_color_continuous(trans = "log",low="red", high="yellow")+ scale_edge_alpha_continuous(trans = "log",range = c(1, 0.1)) +scale_edge_width_continuous(trans = "log", range = c(4, 1))+
theme_void()
I think there is wrong mapping somewhere but I can't figure out where. Thank you so much for your input!
I believe there is a bug in this library. Rearranging the input data by the column of choice (pvalue in my case) in an ascending order helped but did not solve the issue.
connect_new <- arrange(connect, pvalue)
and I found the solution in a github issue submitted by another user. The subgroups within each group need to be ordered alphabetically in the hierarchy and vertices file. In addition, in the connect dataframe, the subgroups need to be ordered following the same order in the hierarchy and vertices file. Thanks to zhuxr11

How to Plot line chart using R for time-series analysis

I am trying to plot a line chart using Date-time and no of tweets at that period of date and time in R.
library(ggplot2)
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Label = c("2020-03-12",
"2020-03-13"), class = "factor"), Time = structure(c(1L, 1L, 2L,
3L, 4L, 5L), .Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z",
"00:25:12Z", "01:00:02Z"), class = "factor"), Text = structure(c(5L,
3L, 6L, 4L, 2L, 1L), .Label = c("The images of demonstrations and gathering", "Premium policy get activate by company abc",
"Launches of rocket", "Premium policy get activate by company abc",
"Technology makes trend", "The images of demonstrations and gatherings",
"Weather forecasting by xyz"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
ggplot(df1, aes(x = Date, y = text(count)) + geom_line(aes(color = variable), size = 1)
I tried the above code to plot desired result but got an error. Dataset given like that in csv format.
Date Time Text
2020-03-12 00:00:00Z The images of demonstrations and gatherings
2020-03-12 00:00:00Z Premium policy get activate by company abc
2020-03-12 00:00:01Z Weather forecasting by xyz
2020-03-12 00:10:04Z Technology makes trend
2020-03-12 00:25:12Z Launches of rocket
2020-03-12 01:00:02Z Government launch new policy to different sector improvement
I have a dataset of nearly 15 days and want to plot the line chart to visualize the number of tweets (given in text column) to see the trend of tweets on different time and date.
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Label = c("3/12/2020",
"3/13/2020"), class = "factor"), Time = structure(c(1L, 1L, 2L,
3L, 4L, 5L), .Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z",
"00:25:12Z", "01:00:02Z"), class = "factor"), Text = structure(c(5L,
3L, 6L, 4L, 2L, 1L), .Label = c("Government launch new policy to different sector",
"Launches of rocket", "Premium policy get activate by company abc",
"Technology makes trend", "The images of demonstrations and gatherings",
"Weather forecasting by xyz"), class = "factor"), X = structure(c(1L,
1L, 1L, 1L, 1L, 2L), .Label = c("", "improvement"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
Creating the dataset df1 as above then running this gives you required plot for hour
library(tidyverse)
library(lubridate)
df1 %>%
mutate(Time=hms(Time),
Date=mdy(Date),
hour=hour(Time)) %>%
count(hour) %>%
ggplot(aes(hour,n,group=1))+geom_line()+geom_point()
Is this what you are after?
library(dplyr)
library(lubridate)
library(stringr)
library(ggplot2)
Answer with your data
To demonstrate data wrangling.
# your data;
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L),
.Label = c("2020-03-12","2020-03-13"),
class = "factor"),
Time = structure(c(1L, 1L, 2L,3L, 4L, 5L),
.Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z","00:25:12Z", "01:00:02Z"),
class = "factor"),
Text = structure(c(5L,3L, 6L, 4L, 2L, 1L),
.Label = c("The images of demonstrations and gathering", "Premium policy get activate by company abc",
"Launches of rocket", "Premium policy get activate by company abc",
"Technology makes trend", "The images of demonstrations and gatherings", "Weather forecasting by xyz"), class = "factor")),
class = "data.frame", row.names = c(NA,-6L))
# data wrangle
df2 <-
df1 %>%
# change all variables from factors to character
mutate_all(as.character) %>%
mutate(Time = str_remove(Time, "Z$"), #remove the trailing 'Z' from Time values
dt = ymd_hms(paste(Date, Time, sep = " ")), # change text into datetime format using lubridtate::ymd_hms
dt = ceiling_date(dt, unit="hour")) %>% # round to the end of the named hour, separated for clarity
group_by(dt) %>%
summarise(nr_tweets = n())
# plot
p1 <- ggplot(df2, aes(dt, nr_tweets))+
geom_line()+
scale_x_datetime(date_breaks = "1 day", date_labels = "%d/%m")+
ggtitle("Data from question `df1`")
Answer with made up large dataset
tib <- tibble(dt = sample(seq(ISOdate(2020,05,01), ISOdate(2020,05,15), by = "sec"), 10000, replace = TRUE),
text = sample(c(letters[1:26], LETTERS[1:26]), 10000, replace = TRUE))
tib1 <-
tib %>%
mutate(dt = round_date(dt, unit="hour"))%>%
group_by(dt) %>%
summarise(nr_tweets = n())
p2 <- ggplot(tib1, aes(dt, nr_tweets))+
geom_line()+
scale_x_datetime(date_breaks = "1 day", date_labels = "%d/%m")+
ggtitle("Result using `tib` data made up to answer the question")
p1/p2
Created on 2020-05-13 by the reprex package (v0.3.0)

Adding observation count in two-factor boxplot

I have already seen another similar question in : Add number of observations per group in ggplot2 boxplot
But this solution doesn't work in my plot as I have two-factors boxplots.
This is what I tried:
Here is my data:
> dput(Simp_Shan_Baseline_Grp[1:5,1:20])
structure(list(Datasets = structure(1:5, .Label = c("30001.10001.Visit.1.Baseline.Day.0.1h..",
"30001.10002.Visit.1.Baseline.Day.0.1h..", "30001.10003.Visit.1.Baseline.Day.0.1h..",
"30001.10004.Visit.1.Baseline.Day.0.1h..", "30001.10005.Visit.1.Baseline.Day.0.1h..",
"30001.10006.Visit.1.Baseline.Day.0.1h..", "30004.10001.Visit.1.Baseline.Day.0.1h..",
"30004.10002.Visit.1.Baseline.Day.0.1h..", "30004.10003.Visit.1.Baseline.Day.0.1h..",
"30004.10004.Visit.1.Baseline.Day.0.1h..", "30004.10006.Visit.1.Baseline.Day.0.1h..",
"30004.10007.Visit.1.Baseline.Day.0.1h..", "30004.10008.Visit.1.Baseline.Day.0.1h..",
"30005.10001.Visit.1.Baseline.Day.0.1h..", "30005.10002.Visit.1.Baseline",
"30005.10003.Visit.1.Baseline.Day.0.1h..", "30005.10004.Visit.1.Baseline.Day.0.1h..",
"30005.10005.Visit.1.Baseline.Day.0.1h..", "30005.10006.Visit.1.Baseline.Day.0.1h..",
"30005.10007.Visit.1.Baseline.Day.0.1h..", "30005.10008.Visit.1.Baseline.Day.0.1h..",
"30005.10009.Visit.1.Baseline.Day.0.1h..", "30006.10001.Visit.1.Baseline",
"30006.10002.Visit.1.Baseline", "30007.10001.Visit.1.Baseline.Day.0.1h..",
"30007.10002.Visit.1.Baseline.Day.0.1h..", "30008.10001.Visit.1.Baseline.Day.0.1h..",
"30008.10002.Visit.1.Baseline.Day.0.1h..", "30008.10003.Visit.1.Baseline",
"30008.10004.Visit.1.Baseline", "30008.10005.Visit.1.Baseline.Day.0.1h..",
"30008.10006.Visit.1.Baseline.Day.0.1h..", "30008.10007.Visit.1.Baseline.Day.0.1h..",
"30008.10008.Visit.1.Baseline.Day.0.1h..", "30009.10001.Visit.1.Baseline.Day.0.1h..",
"30009.10002.Visit.1.Baseline.Day.0.1h..", "30009.10003.Visit.1.Baseline.Day.0.1h..",
"30009.10004.Visit.1.Baseline.Day.0.1h..", "30009.10005.Visit.1.Baseline.Day.0.1h..",
"30009.10007.Visit.1.Baseline.Day.0.1h..", "30010.10001.Visit.1.Baseline.Day.0.1h..",
"30010.10002.Visit.1.Baseline.Day.0.1h..", "32005.10001.Visit.1.Baseline.Day.0.1h..",
"33001.10001.Visit.1.Baseline.Day.0.1h..", "33001.10002.Visit.1.Baseline.Day.0.1h..",
"33001.10003.Visit.1.Baseline.Day.0.1h..", "33001.10004.Visit.1.Baseline.Day.0.1h..",
"33001.10005.Visit.1.Baseline.Day.0.1h..", "33001.10006.Visit.1.Baseline.Day.0.1h..",
"33002.10001.Visit.1.Baseline.Day.0.1h..", "33002.10002.Visit.1.Baseline.Day.0.1h..",
"33002.10003.Visit.1.Baseline.Day.0.1h..", "33002.10004.Visit.1.Baseline.Day.0.1h..",
"33004.10001.Visit.1.Baseline.Day.0.1h..", "33005.10001.Visit.1.Baseline",
"33005.10002.Visit.1.Baseline.Day.0.1h..", "33005.10004.Visit.1.Baseline.Day.0.1h..",
"33006.10001.Visit.1.Baseline.Day.0.1h..", "33006.10002.Visit.1.Baseline.Day.0.1h..",
"33006.10003.Visit.1.Baseline.Day.0.1h..", "33006.10005.Unscheduled.Visit.F.Day.0.8h..",
"33006.10006.Visit.1.Baseline.Day.0.1h..", "33009.10001.Visit.1.Baseline.Day.0.1h..",
"33009.10002.Visit.1.Baseline.Day.0.1h..", "33009.10003.Visit.1.Baseline.Day.0.1h..",
"33009.10004.Visit.1.Baseline.Day.0.1h..", "33009.10005.Visit.1.Baseline.Day.0.1h..",
"34003.10001.Visit.1.Baseline.Day.0.1h..", "34003.10002.Visit.1.Baseline.Day.0.1h..",
"34003.10003.Visit.1.Baseline.Day.0.1h..", "34003.10004.Visit.1.Baseline.Day.0.1h..",
"34003.10005.Visit.1.Baseline.Day.0.1h..", "34003.10006.Visit.1.Baseline.Day.0.1h..",
"34003.10007.Visit.1.Baseline.Day.0.1h..", "34003.10008.Visit.1.Baseline.Day.0.1h..",
"34004.10001.Visit.1.Baseline.Day.0.1h..", "34004.10002.Visit.1.Baseline.Day.0.1h..",
"34004.10003.Visit.1.Baseline.Day.0.1h..", "34004.10004.Visit.1.Baseline.Day.0.1h..",
"34004.10005.Visit.1.Baseline", "35104.10001.Visit.1.Baseline.Day.0.1h..",
"35106.10001.Unscheduled.Visit.R.Day.0.7h..", "35107.10001.Visit.1.Baseline.Day.0.1h..",
"35801.10001.Visit.1.Baseline", "35802.10002.Visit.1.Baseline.Day.0.1h..",
"35802.10003.Visit.1.Baseline.Day.0.1h..", "36001.10001.Visit.1.Baseline.Day.0.1h..",
"36001.10002.Visit.1.Baseline.Day.0.1h..", "36004.10003.Visit.1.Baseline.Day.0.1h..",
"36004.10004.Visit.1.Baseline.Day.0.1h..", "36004.10005.Visit.1.Baseline.Day.0.1h..",
"36004.10006.Visit.1.Baseline.Day.0.1h..", "36005.10001.Visit.1.Baseline.Day.0.1h..",
"36007.10001.Visit.1.Baseline.Day.0.1h..", "36008.10001.Visit.1.Baseline.Day.0.1h..",
"36008.10005.Visit.1.Baseline.Day.0.1h..", "36008.10007.Visit.1.Baseline.Day.0.1h..",
"36008.10012.Visit.1.Baseline.Day.0.1h..", "36008.10017.Visit.1.Baseline.Day.0.1h..",
"36008.10018.Visit.1.Baseline.Day.0.1h..", "36008.10020.Visit.1.Baseline.Day.0.1h..",
"36008.10021.Visit.1.Baseline.Day.0.1h..", "36008.10022.Visit.1.Baseline.Day.0.1h..",
"36009.10001.Visit.1.Baseline.Day.0.1h..", "36009.10002.Visit.1.Baseline.Day.0.1h..",
"36009.10003.Visit.1.Baseline.Day.0.1h..", "36009.10004.Visit.1.Baseline.Day.0.1h..",
"36009.10005.Visit.1.Baseline.Day.0.1h..", "36009.10006.Visit.1.Baseline.Day.0.1h..",
"36010.10001.Visit.1.Baseline.Day.0.1h..", "36010.10002.Visit.1.Baseline.Day.0.1h..",
"36010.10003.Visit.1.Baseline.Day.0.1h..", "38501.10001.Visit.1.Baseline.Day.0.1h..",
"38501.10002.Visit.1.Baseline.Day.0.1h..", "38501.10003.Visit.1.Baseline.Day.0.1h..",
"38505.10001.Visit.1.Baseline.Day.0.1h..", "38505.10002.Visit.1.Baseline.Day.0.1h..",
"38506.10001.Visit.1.Baseline.Day.0.1h..", "38506.10002.Visit.1.Baseline.Day.0.1h..",
"38506.10003.Visit.1.Baseline.Day.0.1h..", "38506.10004.Visit.1.Baseline.Day.0.1h..",
"38601.10001.Visit.1.Baseline.Day.0.1h..", "38601.10003.Visit.1.Baseline.Day.0.1h..",
"38601.10004.Visit.1.Baseline.Day.0.1h..", "38601.10006.Visit.1.Baseline.Day.0.1h..",
"38601.10007.Visit.1.Baseline", "38602.10001.Visit.1.Baseline.Day.0.1h..",
"38602.10002.Visit.1.Baseline.Day.0.1h..", "38603.10002.Visit.1.Baseline.Day.0.1h..",
"38603.10003.Visit.1.Baseline.Day.0.1h..", "39001.10001.Visit.1.Baseline",
"39001.10002.Visit.1.Baseline", "39001.10003.Visit.1.Baseline",
"39001.10004.Visit.1.Baseline.Day.0.1h..", "39001.10005.Visit.1.Baseline",
"39001.10006.Visit.1.Baseline.Day.0.1h..", "39001.10007.Visit.1.Baseline",
"39001.10008.Visit.1.Baseline", "39001.10009.Visit.1.Baseline",
"39001.10010.Visit.1.Baseline.Day.0.1h..", "39002.10001.Visit.1.Baseline.Day.0.1h..",
"39003.10001.Visit.1.Baseline", "39004.10001.Visit.1.Baseline.Day.0.1h..",
"39004.10002.Visit.1.Baseline.Day.0.1h..", "39004.10003.Visit.1.Baseline.Day.0.1h..",
"39005.10001.Visit.1.Baseline.Day.0.1h..", "39005.10002.Visit.1.Baseline.Day.0.1h..",
"39006.10001.Visit.1.Baseline.Day.0.1h..", "39006.10002.Visit.1.Baseline.Day.0.1h..",
"39006.10003.Visit.1.Baseline.Day.0.1h..", "39006.10004.Visit.1.Baseline.Day.0.1h..",
"39006.10005.Visit.1.Baseline.Day.0.1h..", "39006.10006.Visit.1.Baseline.Day.0.1h..",
"39007.10001.Visit.1.Baseline.Day.0.1h..", "39007.10002.Visit.1.Baseline.Day.0.1h..",
"39007.10003.Visit.1.Baseline.Day.0.1h..", "39010.10001.Visit.1.Baseline.Day.0.1h..",
"40001.10002.Visit.1.Baseline.Day.0.1h..", "40001.10003.Visit.1.Baseline.Day.0.1h..",
"40001.10005.Visit.1.Baseline", "40001.10006.Visit.1.Baseline",
"40001.10007.Visit.1.Baseline", "40001.10011.Visit.1.Baseline.Day.0.1h..",
"40001.10013.Visit.1.Baseline", "40001.10014.Visit.1.Baseline",
"40001.10015.Visit.1.Baseline.Day.0.1h..", "40001.10016.Visit.1.Baseline.Day.0.1h..",
"40001.10017.Visit.1.Baseline.Day.0.1h..", "40001.10019.Visit.1.Baseline.Day.0.1h..",
"40002.10001.Visit.1.Baseline.Day.0.1h..", "40002.10002.Visit.1.Baseline.Day.0.1h..",
"40002.10003.Visit.1.Baseline", "40002.10004.Visit.1.Baseline.Day.0.1h..",
"40002.10005.Visit.1.Baseline", "40002.10006.Visit.1.Baseline",
"40002.10007.Visit.1.Baseline.Day.0.1h..", "40002.10008.Visit.1.Baseline",
"40002.10009.Visit.1.Baseline.Day.0.1h..", "40002.10010.Visit.1.Baseline",
"40002.10012.Visit.1.Baseline", "40002.10013.Visit.1.Baseline.Day.0.1h..",
"40002.10014.Visit.1.Baseline", "40002.10015.Visit.1.Baseline.Day.0.1h..",
"40002.10016.Visit.1.Baseline", "40002.10017.Visit.1.Baseline",
"40003.10001.Visit.1.Baseline.Day.0.1h..", "40003.10002.Visit.1.Baseline.Day.0.1h..",
"40003.10003.Visit.1.Baseline.Day.0.1h..", "40003.10004.Visit.1.Baseline.Day.0.1h..",
"40003.10005.Visit.1.Baseline.Day.0.1h..", "40003.10006.Visit.1.Baseline",
"40003.10007.Visit.1.Baseline.Day.0.1h..", "40003.10008.Visit.1.Baseline.Day.0.1h..",
"40003.10009.Visit.1.Baseline", "40003.10010.Visit.1.Baseline.Day.0.1h..",
"40003.10011.Visit.1.Baseline.Day.0.1h..", "40003.10012.Visit.1.Baseline.Day.0.1h..",
"40003.10013.Visit.1.Baseline", "40003.10014.Visit.1.Baseline.Day.0.1h..",
"40003.10015.Visit.1.Baseline.Day.0.1h..", "40003.10016.Visit.1.Baseline.Day.0.1h..",
"41001.10001.Visit.1.Baseline.Day.0.1h..", "41001.10002.Visit.1.Baseline.Day.0.1h..",
"41001.10003.Visit.1.Baseline.Day.0.1h..", "41002.10001.Visit.1.Baseline.Day.0.1h..",
"41004.10001.Visit.1.Baseline.Day.0.1h..", "42001.10001.Visit.1.Baseline.Day.0.1h..",
"42001.10002.Visit.1.Baseline.Day.0.1h..", "42001.10004.Visit.1.Baseline.Day.0.1h..",
"42001.10005.Visit.1.Baseline.Day.0.1h..", "42001.10006.Visit.1.Baseline.Day.0.1h..",
"42001.10007.Visit.1.Baseline.Day.0.1h..", "42001.10008.Visit.1.Baseline.Day.0.1h..",
"42002.10001.Visit.1.Baseline.Day.0.1h..", "42002.10002.Visit.1.Baseline.Day.0.1h..",
"42002.10003.Visit.1.Baseline", "42002.10004.Visit.1.Baseline.Day.0.1h..",
"42003.10001.Visit.1.Baseline.Day.0.1h..", "42003.10002.Visit.1.Baseline.Day.0.1h..",
"42003.10004.Visit.1.Baseline.Day.0.1h..", "42004.10001.Visit.1.Baseline",
"42004.10002.Visit.1.Baseline", "42004.10003.Visit.1.Baseline.Day.0.1h..",
"42004.10004.Visit.1.Baseline.Day.0.1h..", "42005.10001.Visit.1.Baseline.Day.0.1h..",
"42005.10002.Visit.1.Baseline.Day.0.1h..", "42005.10003.Unscheduled.Visit.R.Day.0.7h..",
"42005.10004.Visit.1.Baseline.Day.0.1h..", "42005.10005.Visit.1.Baseline.Day.0.1h..",
"42005.10006.Visit.1.Baseline", "42005.10007.Visit.1.Baseline.Day.0.1h..",
"42005.10008.Visit.1.Baseline.Day.0.1h..", "43001.10001.Visit.1.Baseline.Day.0.1h..",
"43002.10001.Visit.1.Baseline.Day.0.1h..", "43002.10002.Visit.1.Baseline.Day.0.1h..",
"43003.10001.Visit.1.Baseline", "44003.10001.Visit.1.Baseline",
"44005.10002.Visit.1.Baseline.Day.0.1h..", "44005.10003.Visit.1.Baseline.Day.0.1h..",
"44008.10006.Visit.1.Baseline.Day.0.1h..", "44008.10009.Visit.1.Baseline.Day.0.1h..",
"44008.10011.Visit.1.Baseline", "44008.10013.Visit.1.Baseline",
"45004.10001.Visit.1.Baseline.Day.0.1h..", "45004.10003.Visit.1.Baseline",
"46001.10001.Visit.1.Baseline.Day.0.1h..", "46001.10002.Visit.1.Baseline.Day.0.1h..",
"46001.10003.Visit.1.Baseline.Day.0.1h..", "46001.10004.Visit.1.Baseline.Day.0.1h..",
"46001.10005.Visit.1.Baseline.Day.0.1h..", "46002.10001.Visit.1.Baseline.Day.0.1h..",
"46002.10003.Visit.1.Baseline.Day.0.1h..", "46004.10001.Visit.1.Baseline.Day.0.1h..",
"46005.10001.Visit.1.Baseline.Day.0.1h..", "46005.10003.Visit.1.Baseline.Day.0.1h..",
"48002.10001.Visit.1.Baseline.Day.0.1h..", "48002.10002.Visit.1.Baseline.Day.0.1h..",
"48003.10001.Visit.1.Baseline.Day.0.1h..", "48003.10002.Visit.1.Baseline.Day.0.1h..",
"48003.10003.Visit.1.Baseline.Day.0.1h..", "48003.10004.Visit.1.Baseline.Day.0.1h..",
"48004.10001.Visit.1.Baseline.Day.0.1h..", "48004.10003.Visit.1.Baseline.Day.0.1h..",
"48004.10005.Visit.1.Baseline", "48004.10006.Visit.1.Baseline.Day.0.1h..",
"48004.10007.Visit.1.Baseline.Day.0.1h..", "48004.10008.Visit.1.Baseline.Day.0.1h..",
"48004.10009.Visit.1.Baseline.Day.0.1h..", "48004.10011.Visit.1.Baseline.Day.0.1h..",
"48004.10012.Visit.1.Baseline.Day.0.1h..", "48004.10014.Visit.1.Baseline.Day.0.1h..",
"48004.10017.Visit.1.Baseline.Day.0.1h..", "48004.10018.Visit.1.Baseline.Day.0.1h..",
"48004.10019.Visit.1.Baseline.Day.0.1h..", "48004.10020.Visit.1.Baseline.Day.0.1h..",
"48004.10021.Visit.1.Baseline.Day.0.1h..", "48004.10022.Visit.1.Baseline.Day.0.1h..",
"48004.10023.Visit.1.Baseline.Day.0.1h..", "48008.10001.Visit.1.Baseline.Day.0.1h..",
"48008.10002.Visit.1.Baseline.Day.0.1h..", "48011.10001.Visit.1.Baseline.Day.0.1h..",
"48011.10002.Visit.1.Baseline", "48012.10002.Visit.1.Baseline.Day.0.1h..",
"48012.10004.Visit.1.Baseline.Day.0.1h..", "48012.10005.Visit.1.Baseline.Day.0.1h..",
"49001.10001.Unscheduled.Visit.R.Day.0.7h..", "49001.10002.Visit.1.Baseline.Day.0.1h..",
"49006.10002.Visit.1.Baseline.Day.0.1h..", "49006.10003.Visit.1.Baseline.Day.0.1h..",
"49006.10006.Visit.1.Baseline", "49006.10007.Visit.1.Baseline.Day.0.1h..",
"49006.10008.Visit.1.Baseline.Day.0.1h..", "49006.10009.Visit.1.Baseline.Day.0.1h..",
"49008.10001.Visit.1.Baseline.Day.0.1h..", "49008.10002.Visit.1.Baseline.Day.0.1h..",
"49008.10003.Visit.1.Baseline.Day.0.1h..", "49011.10001.Visit.1.Baseline.Day.0.1h..",
"49011.10002.Visit.1.Baseline.Day.0.1h..", "49011.10003.Visit.1.Baseline.Day.0.1h..",
"49012.10001.Visit.1.Baseline.Day.0.1h..", "49016.10002.Visit.1.Baseline.Day.0.1h..",
"70001.10001.Visit.1.Baseline.Day.0.1h..", "70001.10002.Visit.1.Baseline.Day.0.1h..",
"70001.10003.Visit.1.Baseline.Day.0.1h..", "70001.10004.Visit.1.Baseline.Day.0.1h..",
"70001.10005.Visit.1.Baseline.Day.0.1h..", "70001.10006.Visit.1.Baseline.Day.0.1h..",
"70001.10007.Visit.1.Baseline.Day.0.1h..", "70001.10008.Visit.1.Baseline.Day.0.1h..",
"70003.10001.Visit.1.Baseline.Day.0.1h..", "70003.10002.Visit.1.Baseline.Day.0.1h..",
"70003.10003.Visit.1.Baseline.Day.0.1h..", "70003.10004.Visit.1.Baseline.Day.0.1h..",
"70003.10005.Visit.1.Baseline.Day.0.1h..", "70003.10006.Visit.1.Baseline.Day.0.1h..",
"90002.10001.Visit.1.Baseline.Day.0.1h..", "90003.10001.Visit.1.Baseline.Day.0.1h..",
"90003.10002.Visit.1.Baseline.Day.0.1h..", "90003.10003.Visit.1.Baseline.Day.0.1h..",
"90003.10004.Visit.1.Baseline.Day.0.1h..", "90005.10001.Visit.1.Baseline.Day.0.1h..",
"90005.10002.Visit.1.Baseline.Day.0.1h.."), class = "factor"),
Simp = c(0.562967424, 0.771395613, 0.720549673, 0.520301987,
0.498477511), Day = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "D0", class = "factor"),
Visit = structure(c(2L, 2L, 2L, 2L, 2L), .Label = c("U",
"V1"), class = "factor"), Group = structure(c(1L, 1L, 1L,
1L, 1L), .Label = "1_", class = "factor"), Timepoints = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "1_D0", class = "factor"), Total.Sequence = c(16038L,
24250L, 13939L, 28722L, 19665L), specnumber = c(49L, 33L,
29L, 20L, 20L), Shan = c(1.237756795, 1.670299627, 1.617010117,
0.985164005, 0.960982468), TREATMENT = structure(c(2L, 1L,
1L, 1L, 1L), .Label = c("FIDAXOMICIN", "VANCOMYCIN"), class = "factor"),
SUBJID = c(3e+09, 3e+09, 3e+09, 3e+09, 3e+09), ARM = structure(c(2L,
1L, 1L, 1L, 1L), .Label = c("FIDAXOMICIN", "VANCOMYCIN"), class = "factor"),
TRT01PN = c(2L, 1L, 1L, 1L, 1L), SAFFL = structure(c(2L,
2L, 2L, 2L, 2L), .Label = c("N", "Y"), class = "factor"),
MFASFL = structure(c(2L, 2L, 2L, 2L, 2L), .Label = c("N",
"Y"), class = "factor"), MEASFL = structure(c(2L, 2L, 2L,
1L, 2L), .Label = c("N", "Y"), class = "factor"), SEX = structure(c(1L,
1L, 2L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
AGE = c(86L, 76L, 60L, 83L, 85L), CRFL1 = structure(c(1L,
1L, 1L, 2L, 1L), .Label = c("Y", "N"), class = c("ordered",
"factor")), CRFL1_VF_YN = structure(c(4L, 1L, 2L, 1L, 2L), .Label = c("FIDAXOMICIN_N",
"FIDAXOMICIN_Y", "VANCOMYCIN_N", "VANCOMYCIN_Y"), class = "factor")), .Names = c("Datasets",
"Simp", "Day", "Visit", "Group", "Timepoints", "Total.Sequence",
"specnumber", "Shan", "TREATMENT", "SUBJID", "ARM", "TRT01PN",
"SAFFL", "MFASFL", "MEASFL", "SEX", "AGE", "CRFL1", "CRFL1_VF_YN"
), row.names = c(NA, 5L), class = "data.frame")
> head(Simp_Shan_Baseline_Grp[1:5,1:20])
Datasets Simp Day Visit Group
1 30001.10001.Visit.1.Baseline.Day.0.1h.. 0.5629674 D0 V1 1_
2 30001.10002.Visit.1.Baseline.Day.0.1h.. 0.7713956 D0 V1 1_
3 30001.10003.Visit.1.Baseline.Day.0.1h.. 0.7205497 D0 V1 1_
4 30001.10004.Visit.1.Baseline.Day.0.1h.. 0.5203020 D0 V1 1_
5 30001.10005.Visit.1.Baseline.Day.0.1h.. 0.4984775 D0 V1 1_
Timepoints Total.Sequence specnumber Shan TREATMENT SUBJID
1 1_D0 16038 49 1.2377568 VANCOMYCIN 3e+09
2 1_D0 24250 33 1.6702996 FIDAXOMICIN 3e+09
3 1_D0 13939 29 1.6170101 FIDAXOMICIN 3e+09
4 1_D0 28722 20 0.9851640 FIDAXOMICIN 3e+09
5 1_D0 19665 20 0.9609825 FIDAXOMICIN 3e+09
ARM TRT01PN SAFFL MFASFL MEASFL SEX AGE CRFL1 CRFL1_VF_YN
1 VANCOMYCIN 2 Y Y Y F 86 Y VANCOMYCIN_Y
2 FIDAXOMICIN 1 Y Y Y F 76 Y FIDAXOMICIN_N
3 FIDAXOMICIN 1 Y Y Y M 60 Y FIDAXOMICIN_Y
4 FIDAXOMICIN 1 Y Y N F 83 N FIDAXOMICIN_N
5 FIDAXOMICIN 1 Y Y Y F 85 Y FIDAXOMICIN_Y
Now the boxplot I tried:
ggplot(data = Simp_Shan_Baseline_Grp, aes(x=CRFL1, y=Shan)) + geom_boxplot(aes(fill=TREATMENT)) + stat_summary(fun.data = give.n, geom = "text", fun.y = median,
As you see only two factors as Y/N has been added. But I need count for each box.
Further Yes I have tried moving the aesthetics in ggplot solves the problem. Now I have used the code ggplot(data = Simp_Shan_Baseline_Grp, aes(x=CRFL1, y=Shan, colour = factor(TREATMENT))) + geom_boxplot() + stat_summary(fun.data = give.n, geom = "text", fun.y = median, position = position_dodge(width = 0.75)).
Now the resulting plot looks like:
enter image description here I don't manage to get fill colour boxes. Like my previous image.
Ideally I like to have fill colour and text in black for each box.
Can anybody please help me?
Thank you,
Mitra
You could use the following code
# load lib
library(ggplot2)
library(dplyr)
# define UDF give.n
give.n <- function(x){
return(c(y = median(x)*1.05, label = length(x)))
# experiment with the multiplier to find the perfect position
}
## some playing around with the original data
## if not changed it gives a very limited output
## first select only needed columns
Simp_Shan_Baseline_Grp2 <- Simp_Shan_Baseline_Grp %>%
select(CRFL1, Shan, TREATMENT)
## then duplicate data given, and change some values
Simp_Shan_Baseline_Grp2 <- rbind(Simp_Shan_Baseline_Grp2, Simp_Shan_Baseline_Grp2)
Simp_Shan_Baseline_Grp2$CRFL1 <- factor(rep(c("Y", "N"), 5))
Simp_Shan_Baseline_Grp2$TREATMENT[3] <- "VANCOMYCIN"
Simp_Shan_Baseline_Grp2$TREATMENT[8] <- "VANCOMYCIN"
# shift the fill-aes to the initial ggplot call
# add position adjustment
ggplot(data = Simp_Shan_Baseline_Grp2, aes(x=CRFL1, y=Shan, fill=TREATMENT)) +
geom_boxplot() +
stat_summary(fun.data = give.n, geom = "text", fun.y = median,
position = position_dodge(width = .750) )
This yields the following plot:

The lattice equivalent of geom_tile when displaying text

I am interested in knowing if there is a lattice alternative to geom_tile() in ggplot2 when I want to display factor levels/map fill to text. Example data frame (df) follows...
Gene Sample Mutation
A1 2 Missense
A2 2 WT
A1 3 Missense
A2 3 Missense
With ggplot2 this is trivial
qplot(df, y=Gene, x=Sample, fill=Mutation, geom='tile')
what would the lattice equivalent of this be? (I am interested in this because axis alignment in ggplot2 between plots is convoluted and cumbersome currently).
df <- structure(list(Gene = structure(c(1L, 2L, 1L, 2L), .Label = c("A1", "A2"), class = "factor"),
Sample = structure(c(1L, 1L, 2L, 2L ), .Label = c("2", "3"), class = "factor"),
Mutation = structure(c(1L, 2L, 1L, 1L), .Label = c("Missense", "WT"), class = "factor")), .Names = c("Gene", "Sample", "Mutation"), row.names = c(NA, -4L), class = "data.frame")
Check out the levelplot() function in lattice, for example
library("lattice")
df <- transform(df, Sample = factor(Sample))
levelplot(Mutation ~ Gene * Sample, data = df)
You'll need to work out the colour scale key yourself though.

Resources