Related
I am working on trying to plot a bubble chart with multiple data variables on the same chart. I will try my best to describe what I would like the final output to appear and attempts from reading online and some questions posted on the forum.
I am just getting familiar with ggplot but if there is solution with another package, I am open to it.
C1 within circles represents color 1, C2 within squares represents color 2, C3 within triangles represents color 3.
I would think these will be 8 independent panels since the y-axis is different for each one.
Really appreciate the help.
The final outputs I would prefer is bubbles that have different size and color. I think individual panels of 8 x 3 would be ideal because the y axis for each panel can be changed. But I cannot figure out how to structure the data to allow for creating 8 x 3 panels with individual y axes and the symbol size. Thanks Closest I came across: R ggplot bubble chart localised bubbles display without in single chart
Preferred output:
library (tidyverse)
library (reshape2)
library(ggplot2)
data.tb <- structure(list(Name = structure(c(7L, 8L, 9L, 1L, 10L, 2L, 11L,
3L, 12L, 13L, 4L, 14L, 5L, 15L, 6L), .Label = c("avg_row3", "avg_row4",
"avg_row5", "avg_row6_7", "avg_row8", "avg_row9", "row1", "row2",
"row3", "row4", "row5", "row6", "row7", "row8", "row9"), class = "factor"),
col1 = c(6333, 8847, 1495292, 169, 28994.1, 3.3, 12857.6,
1.5, 107154, 230344, 38.15, 837364, 132.8, 1226140, 176.74
), col2 = c(20347, 40594, 6229886, 153.5, 122769.8, 3, 44653.4,
1.1, 362972, 944725, 32.21, 3488736, 118.16, 5108506, 158.06
), total_col1_2 = c(23301, 49441, 7725178, 156.3, 151763.9,
3.1, 57511, 1.2, 470126, 1175069, 33.28, 4326100, 120.78,
6334646, 161.4), col3 = c(3313, 4668, 751824.1, 161.1, 14689.2,
3.2, 6784.2, 1.5, 107154, 230344, 72.3, 421021, 162.49, 616496,
204.37), col4 = c(10220, 20940, 3053539.5, 145.8, 60675.8,
2.9, 23034, 1.1, 362972, 944725, 62.45, 1709982, 144.11,
2503902, 182.02), total_col3_4 = c(13533, 25608, 3805363.6,
148.6, 75365, 2.9, 29818.2, 1.2, 470126, 1175069, 64.25,
2131004, 147.46, 3120398, 186.1), col5 = c(3020, 4179, 743468.1,
177.9, 14304.9, 3.4, 6073.5, 1.5, 0, 0, 0, 416342, 99.63,
609644, 145.88), col6 = c(10127, 19654, 3176346.3, 161.6,
62094, 3.2, 21619.4, 1.1, 0, 0, 0, 1778754, 90.5, 2604604,
132.52), total_col5_6 = c(13147, 23833, 3919814, 164.5, 76398.9,
3.2, 27692.9, 1.2, 0, 0, 0, 2195096, 92.1, 3214248, 134.87
)), class = "data.frame", row.names = c(NA, -15L))
data_long.tb <- melt (data.tb, id.vars = c("Name"))
data_long.tb <- data_long.tb %>% mutate(group_num =
case_when(
variable %in% c("col1", "col2", "total_col1_2") ~ "group1",
variable %in% c("col3", "col4", "total_col3_4") ~ "group2",
variable %in% c("col5", "col6", "total_col5_6") ~ "group3",
))
My attempts:
theme_set(theme_bw()) # pre-set the bw theme.
ggplot(data_long.tb,
aes(variable, value)) +
geom_jitter (aes(col=variable, size=value))
theme_set(theme_bw()) # pre-set the bw theme.
ggplot(data_long.tb,
aes(variable, value)) +
geom_jitter (aes(col=variable, size=value)) + facet_wrap(~ group_num)
Not sure this is quite it but hopefully closer.
library(tidyverse)
df %>%
pivot_longer(-Name) %>% # reshape longer for ggplot
mutate(Name = Name %>% fct_inorder %>% fct_rev) %>% # define order of Name,
# reversed so first is at bottom.
group_by(Name) %>% # within each Name...
mutate(x_pos = row_number(), # x_pos counts up in order...
shape = case_when(name %in% c("col1", "col2", "total_col1_2") ~ "circle",
name %in% c("col3", "col4", "total_col3_4") ~ "square",
TRUE ~ "triangle")) %>% # and name determines shape
ungroup() %>%
ggplot(aes(x_pos, Name, size = value, shape = shape, color = shape)) +
geom_point() +
scale_size_area() +
scale_color_manual(values = c("circle" = "red", "square" = "forestgreen",
"triangle" = "purple")) +
scale_shape_manual(values = c("circle" = 19, "square" = 15, "triangle" = 17)) +
theme_minimal()
Sample data
df <- structure(list(Name = c("row1", "row2", "row3", "avg_row3", "row4",
"avg_row4", "row5", "avg_row5", "row6", "row7", "avg_row6_7",
"row8", "avg_row8", "row9", "avg_row9"), col1 = c(6333, 8847,
1495292, 169, 28994.1, 3.3, 12857.6, 1.5, 107154, 230344, 38.15,
837364, 132.8, 1226140, 176.74), col2 = c(20347, 40594, 6229886,
153.5, 122769.8, 3, 44653.4, 1.1, 362972, 944725, 32.21, 3488736,
118.16, 5108506, 158.06), total_col1_2 = c(23301, 49441, 7725178,
156.3, 151763.9, 3.1, 57511, 1.2, 470126, 1175069, 33.28, 4326100,
120.78, 6334646, 161.4), col3 = c(3313, 4668, 751824.1, 161.1,
14689.2, 3.2, 6784.2, 1.5, 107154, 230344, 72.3, 421021, 162.49,
616496, 204.37), col4 = c(10220, 20940, 3053539.5, 145.8, 60675.8,
2.9, 23034, 1.1, 362972, 944725, 62.45, 1709982, 144.11, 2503902,
182.02), total_col3_4 = c(13533, 25608, 3805363.6, 148.6, 75365,
2.9, 29818.2, 1.2, 470126, 1175069, 64.25, 2131004, 147.46, 3120398,
186.1), col5 = c(3020, 4179, 743468.1, 177.9, 14304.9, 3.4, 6073.5,
1.5, 0, 0, 0, 416342, 99.63, 609644, 145.88), col6 = c(10127,
19654, 3176346.3, 161.6, 62094, 3.2, 21619.4, 1.1, 0, 0, 0, 1778754,
90.5, 2604604, 132.52), total_col5_6 = c(13147, 23833, 3919814,
164.5, 76398.9, 3.2, 27692.9, 1.2, 0, 0, 0, 2195096, 92.1, 3214248,
134.87)), row.names = c(NA, -15L), class = c("tbl_df", "tbl",
"data.frame"))
I'm trying to colour a r-plotly boxplot with custom values, but it remains in the default blue colour.
For example - see the code in the official tutorial: https://plotly.com/r/box-plots/#box-plot-with-precomputed-quartiles. When I add the code for coloring, nothing happens with the colours:
fig <- plot_ly(y = list(1,2,3,4,5,6,7,8,9), type = "box", q1=list(1, 2, 3), median=list(4, 5, 6),
q3=list(7, 8, 9 ), lowerfence=list(-1, 0, 1),
upperfence=list(5, 6, 7), mean=list(2.2, 2.8, 3.2 ),
sd=list(0.2, 0.4, 0.6), notchspan=list(0.2, 0.4, 0.6),
color = list(1,1,2),
colors = list("red2", "grey"))
fig
What am I doing wrong?
The example has several issues, so I don't know if I understood the aim of the OP correctly. In the following, I removed the dummy ydata and replaced it with ID characters, used vectors instead of lists (except for y) and a formula with ~ for the color mapping:
library("plotly")
fig <- plot_ly(
y = c("A", "B", "C"),
type = "box",
q1 = c(1, 2, 3),
median = c(4, 5, 6),
q3 = c(7, 8, 9 ),
lowerfence = c(-1, 0, 1),
upperfence = c(5, 6, 7),
mean = c(2.2, 2.8, 3.2 ),
sd = c(0.2, 0.4, 0.6),
notchspan = c(0.2, 0.4, 0.6),
color = ~ c("a", "a", "c"),
colors = c("red2", "grey")
)
fig
For the mean and sd values, I left the original dummy values as in the original example.
I want to display a scatter plot of points from a csv table with ggplot2. The trick is that I'd like each point, or cross, to have a different colour according to their row number in the csv file (using RColorBrewer's spectral colours).
The dataset (dat) looks like this:
modu mnc eff
1 0.3080473 0 0.4420544
2 0.3110355 4 0.4633741
3 0.3334024 9 0.4653061
So I'd like row 1 to be very blue, row two to be a little less, row three to be kind of green, etc.
Here's my code so far:
library(ggplot2)
library(RColorBrewer)
dat <- structure(list(modu = c(0.30947265625, 0.3094921875, 0.32958984375,
0.33974609375, 0.33767578125, 0.3243359375, 0.33513671875, 0.3076171875,
0.3203125, 0.3205078125, 0.3220703125, 0.28994140625, 0.31181640625,
0.352421875, 0.31978515625, 0.29642578125, 0.34982421875, 0.3289453125,
0.30802734375, 0.31185546875, 0.3472265625, 0.303828125, 0.32279296875,
0.3165234375, 0.311328125, 0.33640625, 0.3140234375, 0.33515625,
0.34314453125, 0.33869140625), mnc = c(15, 9, 6, 0, 10, 12, 14,
9, 5, 11, 0, 15, 0, 2, 14, 13, 14, 17, 11, 12, 13, 6, 4, 0, 13,
7, 10, 12, 7, 13), eff = c(0.492448979591836, 0.49687074829932,
0.49421768707483, 0.478571428571428, 0.493537414965986, 0.493809523809524,
0.49891156462585, 0.499319727891156, 0.495102040816327, 0.492285714285714,
0.482312925170068, 0.498911564625851, 0.479931972789116, 0.492857142857143,
0.495238095238095, 0.49891156462585, 0.49530612244898, 0.495850340136055,
0.50156462585034, 0.496, 0.492897959183673, 0.487959183673469,
0.495605442176871, 0.47795918367347, 0.501360544217687, 0.497850340136054,
0.493496598639456, 0.493741496598639, 0.496734693877551, 0.499659863945578
)), .Names = c("modu", "mnc", "eff"), row.names = c(NA, 30L), class = "data.frame")
dat2 <- structure(list(modu = c(0.26541015625, 0.282734375, 0.28541015625,
0.29216796875, 0.293671875), mnc = c(0.16, 0.28, 0.28, 0.28,
0.28), eff = c(0.503877551020408, 0.504149659863946, 0.504625850340136,
0.505714285714286, 0.508503401360544)), .Names = c("modu", "mnc",
"eff"), row.names = c(NA, 5L), class = "data.frame")
dat$modu = dat$modu
dat$mnc = dat$mnc*50
dat$eff = dat$eff
dat2$modu = dat2$modu
dat2$mnc = dat2$mnc*50
dat2$eff = dat2$eff
res <- do.call(rbind, combn(1:3, 2, function(ii)
cbind(setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y))+ geom_point(shape=4) +
facet_wrap(~ var, scales="free")
How should I go about doing this?
Thanks!
res <- do.call(rbind, combn(1:3, 2, function(ii)
cbind(row=seq(nrow(dat)),setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y, color=row))+ geom_point(shape=4) +
scale_color_gradientn(colours=rev(brewer.pal(10,"Spectral")))+
facet_wrap(~ var, scales="free")
To draw arrows in ggplot, I use geom_segment and arrow=arrow().
I would like the arrow head size to match the segment width (or size).
However, arrow does not recognize variables directly from the data argument in ggplot and one must specify data.frame containing the variable using the $ operator. This causes a disjunct between the values used for plotting the line and those used for plotting the arrow head (the largest arrow head can be on the thinest segment).
Example:
d <- structure(list(Process = structure(c(2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L), .Label = c("First", "Second"), class = "factor"),
x.sink = c(1, 3, 1, 2, 2, 3, 3, 2, 2, 2, 2), y.sink = c(1,
1, 1, 2, 2, 1, 1, 1, 1, 2, 2), x.source = c(2, 2, 2, 2, 2,
2, 2, 1, 1, 1, 3), y.source = c(2, 2, 2, 1, 1, 1, 1, 1, 1,
2, 1), offset = c(1, 1, 1, -1, -1, -1, -1, -1, -1, 1, -1),
Std.Flux = c(0.179487179487179, 0.170940170940171, 0.944444444444444,
0.0854700854700855, 0.726495726495726, 0.128205128205128,
0.213675213675214, 0.213675213675214, 0.128205128205128,
0.106837606837607, 1)), .Names = c("Process", "x.sink", "y.sink",
"x.source", "y.source", "offset", "Std.Flux"), class = "data.frame", row.names = c(NA,
-11L))
p <- qplot(data=d,
#alpha=I(0.4),
colour=Process,
size=Std.Flux,
xlim=c(0,4),
ylim=c(0,3),
x=x.source+as.numeric(Process)/10,
y=y.source+as.numeric(Process)/10,
xend=x.sink+as.numeric(Process)/10,
yend=y.sink+as.numeric(Process)/10,
geom="segment",
arrow = arrow(type="closed",
length = unit(d$Std.Flux,"cm")))
print(p)
Any suggestions?
Here's one way:
require(ggplot2)
df <- mtcars
arrow_pos <- data.frame(y = 250)
ggplot(df, aes(x=factor(cyl), y=mpg)) +
geom_bar(width = .4, stat="identity", fill="darkblue") +
geom_segment(data=arrow_pos,
aes(x=1.526, xend=1.01, y=y + 90.02, yend=y + 0.25),
arrow=arrow(length=unit(4.2, "mm")), lwd=2,
color="black") +
geom_segment(data=arrow_pos,
aes(x=1.525, xend=1.01, y=y + 90, yend=y + 0.25),
arrow=arrow(length=unit(4, "mm")), lwd=1,
color="gold2") +
annotate("text", x=2.39, y=360,
label='This arrow points to the highest MPG.') +
scale_y_continuous(limits = c(0,400)) +
xlab('CYL') + ylab('MPG')
Output:
Must have been fixed in the last 8 years :)
Here translated into a call to ggplot()
library(ggplot2)
ggplot(d, aes(colour=Process, size=Std.Flux)) +
geom_segment(aes(x=x.source+as.numeric(Process)/10,
y=y.source+as.numeric(Process)/10,
xend=x.sink+as.numeric(Process)/10,
yend=y.sink+as.numeric(Process)/10),
arrow = arrow(type="closed",
length = unit(d$Std.Flux,"cm")))
I have following type data for human family:
indvidual <- c("John", "Kris", "Peter", "King", "Marry", "Renu", "Kim", "Ken", "Lu")
Parent1 <- c( NA, NA, "John", "John", "John", NA, "Peter", NA, NA)
Parent2 <- c( NA, NA, "Kris", "Kris", "Renu", NA, "Lu", NA, NA)
X <- c( 2, 3, 2, 3, 4, 5, 1.5, 1, 1)
Y <- c( 3, 3, 2, 2, 2, 3, 1, 3, 2)
pchsize <- c( 4.5, 4.3, 9.2, 6.2, 3.2, 6.4, 2.1, 1.9, 8)
fillcol <- c( 8.5, 8.3, 1.2, 3.2, 8.2, 2.4, 2.6, 6.1, 3.2)
myd <- data.frame (indvidual, Parent1, Parent2, X, Y, pchsize,fillcol)
indvidual Parent1 Parent2 X Y pchsize fillcol
1 John <NA> <NA> 2.0 3 4.5 8.5
2 Kris <NA> <NA> 3.0 3 4.3 8.3
3 Peter John Kris 2.0 2 9.2 1.2
4 King John Kris 3.0 2 6.2 3.2
5 Marry John Renu 4.0 2 3.2 8.2
6 Renu <NA> <NA> 5.0 3 6.4 2.4
7 Kim Peter Lu 1.5 1 2.1 2.6
8 Ken <NA> <NA> 1.0 3 1.9 6.1
9 Lu <NA> <NA> 1.0 2 8.0 3.2
I want plot something like the following, individuals points are connected to parents (Preferably different line color to Parent1 and Parent2 listed). Also pch size and pch fill is scaled to other variables pchsize and fillcol. Thus plot outline is:
Here is my progress in ggplot2:
require(ggplot2)
ggplot(data=myd, aes(X, Y,fill = fillcol)) +
geom_point(aes(size = pchsize, fill = fillcol), pch = "O") +
geom_text(aes (label = indvidual, vjust=1.25))
Issues unsolved: connecting lines, making size of pch big and fill color at the sametime.
Here is ggplot2 solution
library(ggplot2)
individual <- c("John", "Kris", "Peter", "King", "Marry", "Renu", "Kim", "Ken", "Lu")
Parent1 <- c( NA, NA, "John", "John", "John", NA, "Peter", NA, NA)
Parent2 <- c( NA, NA, "Kris", "Kris", "Renu", NA, "Lu", NA, NA)
X <- c( 2, 3, 2, 3, 4, 5, 1.5, 1, 1)
Y <- c( 3, 3, 2, 2, 2, 3, 1, 3, 2)
pchsize <- c( 4.5, 4.3, 9.2, 6.2, 3.2, 6.4, 2.1, 1.9, 8)
fillcol <- c( 8.5, 8.3, 1.2, 3.2, 8.2, 2.4, 2.6, 6.1, 3.2)
myd <- data.frame (individual, Parent1, Parent2, X, Y, pchsize,fillcol)
SegmentParent1 <- merge(
myd[, c("individual", "X", "Y")],
myd[!is.na(myd$Parent1), c("Parent1", "X", "Y")],
by.x = "individual", by.y = "Parent1")
SegmentParent2 <- merge(
myd[, c("individual", "X", "Y")],
myd[!is.na(myd$Parent1), c("Parent2", "X", "Y")],
by.x = "individual", by.y = "Parent2")
Segments <- rbind(SegmentParent1, SegmentParent2)
ggplot(data=myd, aes(X, Y)) +
geom_segment(data = Segments, aes(x = X.x, xend = X.y, y = Y.x, yend = Y.y)) +
geom_point(aes(size = pchsize, colour = fillcol)) +
geom_text(aes (label = indvidual), vjust = 0.5, colour = "red", fontface = 2) +
scale_x_continuous("", expand = c(0, 0.6), breaks = NULL) +
scale_y_continuous("", expand = c(0, 0.4), breaks = NULL) +
scale_size(range = c(20, 40)) +
theme_bw()
Here is a solution just using plot(), text(), and arrows(). The for loop is a bit cluttered, but will work for larger data sets and it should be easy to play with the plot and arrows:
plot(myd$X,myd$Y, col='white', type="p", main="", ylab="", xlab="",
axes = FALSE, ylim = c(min(myd$Y)*.8, max(myd$Y)*1.2),
xlim = c(min(myd$X)*.8, max(myd$X)*1.2))
child = data.frame()
child = myd[!is.na(myd$Parent1),]
DArrows = matrix(0,nrow(child),4);
MArrows = matrix(0,nrow(child),4);
for (n in 1:nrow(child)){
d<-child[n,];
c1<-myd$indvidual==as.character(d$Parent1);
b1<-myd[t(c1)];
c2<-myd$indvidual==as.character(d$Parent2);
b2<-myd[t(c2)];
DArrows[n, 1]=as.double(d$X)
DArrows[n, 2]=as.double(d$Y)
DArrows[n, 3]=as.double(b1[4])
DArrows[n, 4]=as.double(b1[5])
MArrows[n, 1]=as.double(d$X)
MArrows[n, 2]=as.double(d$Y)
MArrows[n, 3]=as.double(b2[4])
MArrows[n, 4]=as.double(b2[5])
}
arrows(DArrows[,3],DArrows[,4],DArrows[,1],DArrows[,2],lwd= 2, col = "blue",length=".1")
arrows(MArrows[,3],MArrows[,4],MArrows[,1],MArrows[,2],lwd=2, col = "red",length=".1")
par(new=TRUE)
plot(myd$X,myd$Y,type = "p", main = "", ylab = "", xlab = "",cex = myd$pchsize,
axes = FALSE, pch = 21, ylim = c(min(myd$Y)*.8, max(myd$Y)*1.2),
xlim=c(min(myd$X)*.8, max(myd$X)*1.2), bg = myd$fillcol,fg = 'black')
text(1.12*myd$X, .85*myd$Y, myd$indvidual)
arrows((DArrows[,3]+DArrows[,1])/2, (DArrows[,4]+DArrows[,2])/2,
DArrows[,1], DArrows[,2], lwd = 2, col = "blue", length = ".1")
arrows((MArrows[,3]+MArrows[,1])/2, (MArrows[,4]+MArrows[,2])/2,
MArrows[,1], MArrows[,2], lwd = 2, col = "red", length = ".1")
One thing that jumped out to me was to treat this is a network - R has many packages to plot these.
Here's a very simple solution:
First, I used your parent list to make a sociomatrix - you can generally input networks using edge lists as well - here I put 1 for the first parental relationship and 2 for the second.
psmat <- rbind(c(0, 0, 1, 1, 1, 0, 0, 0, 0),
c(0, 0, 2, 2, 0, 0, 0, 0, 0),
c(0, 0, 0, 0, 0, 0, 1, 0, 0),
rep(0, 9),
rep(0, 9),
c(0, 0, 0, 0, 2, 0, 0, 0, 0),
rep(0, 9),
rep(0, 9),
c(0, 0, 0, 0, 0, 0, 2, 0, 0))
Then, using the network package I just hit:
require(network)
plot(network(psmat), coord = cbind(X, Y), vertex.cex = pchsize,
vertex.col = fillcol, label = indvidual, edge.col = psmat)
This isn't terribly pretty in itself, but I think gives you all the basic elements you wanted.
For the colors, I believe the decimal places are just rounded - I wasn't sure what to do with those.
I know I've seen people plot networks in ggplot, so that might give you a better result.
Edit:
So here's a really messy way of turning your data into a network object directly - someone else might be able to fix it. Additionally, I add an edge attribute (named 'P' for parental status) and give the first set a value of 1 and the second set a value of 2. This can be used when plotting to set the colors.
P1 <- match(Parent1, indvidual)
e1 <- cbind(P1, 1:9); e1 <- na.omit(e1); attr(e1, 'na.action') <- NULL
P2 <- match(Parent2, indvidual)
e2 <- cbind(P2, 1:9); e2 <- na.omit(e2); attr(e2, 'na.action') <- NULL
en1 <- network.initialize(9)
add.edges(en1, e1[,1], e1[,2])
set.edge.attribute(en1, 'P', 1)
add.edges(en1, e2[,1], e2[,2], names.eval = 'P', vals.eval = 2)
plot(en1, coord = cbind(X, Y), vertex.cex = pchsize,
vertex.col = fillcol, label = indvidual, edge.col = 'P')
Alternative solution use igraph
library(igraph)
mm<-data.frame(dest=c(as.character(myd$Parent1),as.character(myd$Parent2)))
mm$orig<-myd$individual
g<-graph.edgelist(as.matrix(mm[!is.na(mm$dest),]))
rownames(myd)<-as.character(myd[,1])
l<-as.matrix(myd[V(g)$name,4:5])
plot(g,layout=l,vertex.color=myd[V(g)$name,6],vertex.size=myd[V(g)$name,6])
Just play a bit with color a sizes!