How to annotate geom_segment arrows in ggplot - r

I have a dataframe:
df_sites <- structure(list(x = c(1.04092250164696, -0.383065216420003, 0.396244810279309,
0.970078841220606, 1.70624019153651, 3.16514402752826, 0.683787687531189,
0.00206174639359557, 0.885459199930364, 0.990634067372794, 0.228548628266029,
5.12827669944002, 0.0950586619539368, -0.275846514997531, 1.5525132408558,
-1.29950430377717, -0.990922674400145, 0.185830660119637, 0.00602127943634668,
-1.02247155743703, -0.251974618425098, 1.87788540164332, 1.28325669941297,
1.02150538568984, -0.865622294371786, -1.96452990510675, -0.524866180755096,
2.17941326700128, -1.34324588367972, -1.81439562296687, -1.13470999575871,
-0.493658775981049, -0.296149601541577, 0.447503914837335, -0.269452469430389,
0.0127337699647291, -1.04287439571777, -0.613105026144241, -1.3890917214799,
-1.90860630718699, -1.16104734632228, -0.584089855574213, -1.2278237710839,
-0.937664406699838, 1.09181991754655, -0.565406792755387, -0.58204838078486,
0.842304932110318), y = c(-3.45147995394394, 2.29349807839102,
0.174644402446899, 3.8468101986443, 2.6412842200453, -0.0665028396276639,
2.05491741522117, 0.165875878990559, -0.25539122973085, 1.74130285620058,
0.396659954165391, -1.65827015730937, 1.17736501075071, -3.72087159136532,
1.89896109873428, 1.68766224921712, -2.92368548480463, -2.42481488216442,
2.20648524060166, -0.486513106980203, 2.05729614246768, 2.51807338395106,
1.9974880289267, -2.67208900165781, -0.749156762561599, 1.93100782500476,
-4.15965374769117, 3.64156647300722, -2.7010471123406, 0.198076035987165,
1.62736086278764, -1.03740092888219, -3.89989372202828, -0.213429351502094,
-0.408170753360095, -1.61011027424538, -0.213306102694109, -0.154504840231308,
0.118730504697768, 1.91054431185776, 0.255125262080179, 0.612701198243207,
-1.21511378377373, 3.29282161162431, 2.50675599190964, -3.80136136529774,
-1.28545510252701, 3.02158440057367), Sites = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "M", class = "factor")), row.names = c("M.T1.R1.S1.16S.S50",
"M.T1.R1.S2.16S.S62", "M.T1.R1.S3.16S.S74", "M.T1.R2.S1.16S.S86",
"M.T1.R2.S2.16S.S3", "M.T1.R2.S3.16S.S15", "M.T1.R3.S1.16S.S27",
"M.T1.R3.S2.16S.S39", "M.T1.R3.S3.16S.S51", "M.T1.R4.S1.16S.S63",
"M.T1.R4.S2.16S.S75", "M.T1.R4.S3.16S.S87", "M.T2.R1.S1.16S.S53",
"M.T2.R1.S2.16S.S65", "M.T2.R1.S3.16S.S77", "M.T2.R2.S1.16S.S89",
"M.T2.R2.S2.16S.S6", "M.T2.R2.S3.16S.S18", "M.T2.R3.S1.16S.S30",
"M.T2.R3.S2.16S.S42", "M.T2.R3.S3.16S.S54", "M.T2.R4.S1.16S.S66",
"M.T2.R4.S2.16S.S78", "M.T2.R4.S3.16S.S90", "M.T3.R1.S1.16S.S56",
"M.T3.R1.S2.16S.S68", "M.T3.R1.S3.16S.S80", "M.T3.R2.S1.16S.S92",
"M.T3.R2.S2.16S.S9", "M.T3.R2.S3.16S.S21", "M.T3.R3.S1.16S.S33",
"M.T3.R3.S2.16S.S45", "M.T3.R3.S3.16S.S57", "M.T3.R4.S1.16S.S69",
"M.T3.R4.S2.16S.S81", "M.T3.R4.S3.16S.S93", "M.T4.R1.S1.16S.S59",
"M.T4.R1.S2.16S.S71", "M.T4.R1.S3.16S.S83", "M.T4.R2.S1.16S.S95",
"M.T4.R2.S2.16S.S12", "M.T4.R2.S3.16S.S24", "M.T4.R3.S1.16S.S36",
"M.T4.R3.S2.16S.S48", "M.T4.R3.S3.16S.S60", "M.T4.R4.S1.16S.S72",
"M.T4.R4.S2.16S.S193", "M.T4.R4.S3.16S.S203"), class = "data.frame")
which I plot as
p<-ggplot()
p<-p+geom_point(data=df_sites,aes(x,y,colour=Sites), shape = "diamond", size = 5)
df_arrows <- structure(list(x = c(-0.0506556191949347, -0.248732307259684,
0.75), y = c(-0.669658874134264, -0.45802558549515, -0.110871926510315
)), class = "data.frame", row.names = c("`POX-C`", "Protein",
"yield"))
p+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm")))
I would like to add annotation to these arrows. How do I do it?

We can use geom_text and the data contained in df_arrows:
library(dplyr) # get %>% and mutate
p <- p+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm")))
p + geom_text(data = df_arrows %>% mutate(labs = row.names(.)),
aes(x = x, y = y, label = labs))
If you want the plot to be a little easier on the eyes and avoid plotting over things, you can try the geom_text_repel function from the ggrepel package.

Related

How to add gradient color to a surface3d in R when Z axis is between 0 and 1

I've seen many posts (here, here, here, and here) on how to add color gradient on the Z axis (but none on "z" values that range from 0 to 1). The only thing is that when I do this, I end up with only two colors if my data on the Z axis is between 0 and 1.
Here is an example:
I would like to have a figure where the Z axis is show a red color when it's near 0 and yellow when it's near 1.
The other problem is that I have a bunch of NA's in the Z axis because I'm defining the surface for only the x and y values that correspond to the points. Usually, people use "outer(x,y,f)" to compute the surface. I don't have an equation where I can just plug the numbers.
Is there a way that I can do this?
df3d = structure(list(phi = c(0.714779631270897, 0.687691682891498,
0.596648688803568, 0.573930669753368, 0.742367142156744, 0.647098819439728,
0.695488766544905, 0.728284245613654, 0.688278993976676, 0.692076206940355,
0.721356887106184, 0.551532807978921, 0.54294513452377, 0.529948458419129,
0.583705941140962, 0.556086109758564, 0.721770088612814, 0.711284095827769,
0.573741332655988, 0.527342613188125, 0.762709309318822, 0.740228675759072,
0.539713252759555, 0.696487636519962, 0.709494568163841, 0.537216639879562,
0.551801008711386, 0.545341937291782, 0.584139265723182, 0.64967079561165,
0.562544215947123, 0.716870075612315, 0.523337825235807, 0.588702763971338,
0.744644767844755, 0.551489639273234, 0.617165392352849, 0.556723007149084,
0.66554863194508, 0.570156474465965, 0.59324644850682, 0.552326531317577,
0.607405070778153, 0.765602115588822, 0.532910404322836, 0.749202895901834,
0.638084894011913, 0.594508381800896, 0.745877525852658, 0.742265176757939,
0.55200104972317, 0.598724220429779, 0.704160605412078, 0.709273655686999,
0.57882815350951, 0.80558646355475, 0.739236441867173, 0.556469513099474,
0.560730917777703, 0.715514054617767, 0.562095774851614, 0.540152840905987,
0.561824376055385, 0.595049050758879, 0.544700858333275, 0.54379044778355,
0.735023707587803, 0.75761987117526, 0.529370104304623, 0.756142990929929,
0.580486562475464, 0.555099817471069, 0.537232767721754, 0.68405457472067,
0.572070245916932, 0.73826438688156, 0.776877621879421, 0.5417182204358,
0.757617713719944, 0.536922997394714, 0.695880672257972, 0.570816629701256,
0.551885077056955, 0.697426644089613, 0.700677930911186, 0.722074526398648,
0.547841598427244, 0.744115961419341, 0.568163711481982, 0.631039420851915,
0.52569185852275, 0.655488455712025, 0.715875702650255, 0.562828009151803,
0.565017441865273, 0.554557230119741, 0.641911755728664, 0.549787832704858,
0.551682550480448, 0.522229525069209), sp = structure(c(4L, 4L,
1L, 1L, 2L, 2L, 2L, 2L, 4L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 4L, 4L,
1L, 1L, 2L, 2L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 1L, 1L,
2L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 4L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
1L, 4L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("fortis", "fuliginosa", "magnirostris", "scandens"
), class = "factor"), pc1 = c(0.175880701440334, 0.00718708371839084,
0.141108047117647, -0.0241407292755287, -0.362347619490667, -0.278187055817663,
-0.322472422874688, -0.342113759548994, -0.0480003258625404,
0.471768235224601, -0.324560745197095, 0.0893840127998557, 0.392067958177292,
0.333197422567793, 0.143274241985899, 0.39728316736576, 0.107258309440993,
0.172013966873444, 0.198033002646736, 0.0233433518931576, -0.379151278648175,
-0.360331402784382, 0.0815105012533928, 0.4916774405792, -0.325531606767521,
0.0464793855349116, 0.128993599551295, 0.0393187306328187, 0.116498023384732,
0.0585444918583008, 0.0519773823187942, 0.117485670789894, 0.141592582273004,
0.0866016090395172, -0.353101745830432, 0.0903683502030376, -0.0766571214760896,
0.0189849871337894, 0.0284234379094188, -0.074411018513597, -0.125981989564305,
-0.04066896524291, 0.0513708917900996, -0.384362095569569, 0.133461942504857,
-0.32950028028642, -0.0970510208736005, 0.169708833257483, -0.363153793934809,
-0.358442393985438, 0.0823660510982192, 0.14891498101178, 0.0874718551667044,
-0.286609834093365, 0.247017305539772, -0.42431120384093, -0.323957076921413,
0.120304498088591, 0.0372009683336541, -0.334862217128121, 0.0850391283675992,
0.426550700956589, 0.053540404847934, 0.114569082118706, 0.145035302093536,
0.462956587489796, -0.352558028645024, -0.370105398490897, 0.249974349261361,
-0.374913268845847, 0.209780781689884, 0.313250151589845, 0.46260008422501,
-0.304611304484123, 0.11736172451962, -0.35863773173462, -0.391035427221015,
0.219372693586083, -0.373985839773145, 0.28640321397829, -0.319643095574694,
0.0125879234209831, 0.182454650537706, -0.0307250825972499, -0.32490678343306,
-0.341204851832981, 0.314073748792412, -0.364615463916348, -0.0644240574912661,
-0.267640246495039, 0.10370599000585, -0.288131406123636, 0.0357411052061282,
0.295614964446489, -0.0145385512772513, -0.0451979384514853,
0.190115107687624, 0.159441037623466, 0.0550870424124392, 0.0582226744080579
), pc2 = c(-1.01095247497725, -1.03868939268555, 0.217310975677827,
0.0285247896165632, 0.0526206694724207, 0.029933782968998, 0.0777356682984891,
0.178400497047045, -0.895131692154304, 0.209867904648101, -0.0527418216237663,
0.00827859255924409, 0.112996963663788, -0.0395108234571918,
0.173676295351724, 0.203897905654255, -0.936940800121312, -1.04245666692378,
0.171077913138838, -0.164692367490732, 0.0227473300072106, 0.108660664812142,
-0.0570692402038391, 0.219114322364657, -0.00559526046181254,
-0.0904496365732674, 0.0329879550738144, -0.0513100262471313,
0.157624496486177, -0.430836781866961, 0.0336830138484876, -0.997472053889813,
-0.151743057518861, 0.153748243948929, -0.0290891308461303, 0.00866038555153437,
0.131519041243216, -0.0113322871452352, -0.487378228261218, -0.0178833351102055,
0.0262770136476736, -0.0671756888678338, 0.190653963041647, 0.0874833382301275,
-0.0729306295513451, -0.114781088459982, 0.176113469790657, 0.229289749785351,
0.023115521362388, 0.0124139031005011, 0.00629127323542669, 0.229545586035766,
-0.643425633985522, -0.119025249254049, 0.222273563398108, 0.0949392931025451,
-0.103328613004053, 0.0497069994557915, 0.0169108098226666, 0.0176907608810171,
0.0525638095222423, 0.0991718002465503, 0.031701514651561, 0.194031271868605,
0.00563908525013029, 0.144806228737922, 0.145921630779316, 0.164295633824383,
-0.0579825386055256, 0.104068297238545, 0.204915386707032, 0.153880371324229,
0.0676594796683301, 0.183052585806673, 0.113255499327757, 0.107866805397445,
0.142039558115177, 0.0274014273919194, 0.133609276043029, 0.023767214013592,
0.0322573857202049, 0.0409388634816843, 0.0643799435826686, -0.850272489901295,
0.0430623373727956, 0.0213513249227984, 0.112589167129505, 0.0764778027855769,
-0.0187866951639582, 0.0514999426382286, -0.141852017637047,
0.132798155087113, -0.811488800456735, 0.18297353727076, 0.00129211340539928,
-0.0604306388888919, 0.39467615944551, 0.0406033888777663, -0.0115831761153328,
-0.190035979057187)), .Names = c("phi", "sp", "pc1", "pc2"), row.names = c("phi[1245,12]",
"phi[1058,12]", "phi[594,12]", "phi[1999,12]", "phi[1546,12]",
"phi[353,12]", "phi[312,12]", "phi[21,12]", "phi[1371,12]", "phi[1874,12]",
"phi[384,12]", "phi[124,12]", "phi[2085,12]", "phi[163,12]",
"phi[221,12]", "phi[1321,12]", "phi[1767,12]", "phi[1883,12]",
"phi[490,12]", "phi[225,12]", "phi[1719,12]", "phi[1613,12]",
"phi[268,12]", "phi[2132,12]", "phi[1458,12]", "phi[1173,12]",
"phi[1335,12]", "phi[1357,12]", "phi[388,12]", "phi[985,12]",
"phi[184,12]", "phi[945,12]", "phi[2143,12]", "phi[1273,12]",
"phi[1738,12]", "phi[2081,12]", "phi[822,12]", "phi[1236,12]",
"phi[2044,12]", "phi[2018,12]", "phi[1065,12]", "phi[314,12]",
"phi[943,12]", "phi[514,12]", "phi[448,12]", "phi[1535,12]",
"phi[1798,12]", "phi[960,12]", "phi[22,12]", "phi[128,12]", "phi[190,12]",
"phi[2037,12]", "phi[772,12]", "phi[1553,12]", "phi[417,12]",
"phi[1659,12]", "phi[1529,12]", "phi[1369,12]", "phi[2075,12]",
"phi[1722,12]", "phi[712,12]", "phi[80,12]", "phi[1050,12]",
"phi[1877,12]", "phi[1195,12]", "phi[1138,12]", "phi[1549,12]",
"phi[1886,12]", "phi[90,12]", "phi[1990,12]", "phi[423,12]",
"phi[783,12]", "phi[165,12]", "phi[1975,12]", "phi[951,12]",
"phi[1681,12]", "phi[1647,12]", "phi[1286,12]", "phi[1666,12]",
"phi[1029,12]", "phi[1989,12]", "phi[668,12]", "phi[1859,12]",
"phi[763,12]", "phi[879,12]", "phi[1639,12]", "phi[839,12]",
"phi[1366,12]", "phi[731,12]", "phi[34,12]", "phi[250,12]", "phi[25,12]",
"phi[457,12]", "phi[465,12]", "phi[1010,12]", "phi[1388,12]",
"phi[2055,12]", "phi[917,12]", "phi[188,12]", "phi[130,12]"), class = "data.frame")
library(scatterplot3d) #http://www.statmethods.net/graphs/scatterplot.html
library(rgl)
library(akima)
sp= c("fortis","fuliginosa","magnirostris","scandens")
open3d()
par3d(windowRect = c(10, 10, 600, 600))
plot3d(x = df3d$pc1,
y = df3d$pc2,
z = df3d$phi,
col=c("#FF3030","#9ACD31", "#1D90FF", "#FF8001")[(as.factor(df3d$sp))],
xlab = "PC1",
ylab = "PC2",
zlab = "Fitness",
type = "p",
# size = round(as.numeric(df3d$z.mean)),
main = "yo")
for(j in 1:length(sp)){
df3d.sp = df3d[df3d$sp == sp[j],]
if(nrow(df3d.sp) == 1){next} else{
s = interp(df3d.sp$pc1,
df3d.sp$pc2,
df3d.sp$phi,
duplicate="strip")
z = s$z*2
zlim <- range(df3d$phi)
zlen <- zlim[2] - zlim[1] + 1
colorlut <- heat.colors(zlen) # height color lookup table
col <- colorlut[ z-zlim[1]+1 ] # assign colors to heights for each point
surface3d(s$x,s$y,s$z,color=col, alpha = 0.5)
}
}
The best I could do is something like this:
for(j in 1:length(sp)){
df3d.sp = df3d[df3d$sp == sp[j],]
if(nrow(df3d.sp) == 1){next} else{
s = interp(df3d.sp$pc1,
df3d.sp$pc2,
df3d.sp$phi,
duplicate="strip")
rbPal <- colorRampPalette(c('yellow','red'))
nb.div = 10
data.col =as.data.frame(matrix(as.factor(cut(s$z,breaks = nb.div)),
dim(s$z)[1],dim(s$z)[2]))
col.index=matrix(as.numeric(unlist(data.col)),
dim(s$z)[1],dim(s$z)[2])
Col <- rbPal(nb.div)[col.index]
col= matrix(Col,dim(s$z)[1],dim(s$z)[2])
surface3d(s$x,s$y,s$z,color=col, alpha = 0.5)
}
}
The problem is that the colors are not going from red to yellow (0->1). They are randomly associated:
Also, the colors are not constrained to be between 0 and 1.
How could I do this?
I've just tried a new code and it seems to work, but not with the data that I have.
library(scatterplot3d)
library(rgl)
library(akima)
x = rnorm(100)
y = rnorm(100)
z1 = -x^2-y^2+x^3
expit <- function(x) 1/(1+exp(-x))
logit <- function(x) log(x/(1-x))
z = expit(z1+1)
plot3d(x = x,
y = y,
z = z,
col="red",
xlab = "PC1",
ylab = "PC2",
zlab = "Fitness",
type = "p",
# size = round(as.numeric(df3d$z.mean)),
main = "yo")
s = interp(x,
y,
z,
duplicate="strip")
rbPal <- colorRampPalette(c('red','yellow'))
nb.div = 10
data.col = as.data.frame(matrix(as.factor(cut(s$z, breaks = nb.div)),
dim(s$z)[1],dim(s$z)[2]))
col.index = matrix(as.numeric(unlist(data.col)),
dim(s$z)[1],dim(s$z)[2])
Col <- rbPal(nb.div)[col.index]
col= matrix(Col, dim(s$z)[1], dim(s$z)[2])
surface3d(s$x,s$y,s$z,color=col, alpha = 1)
Why would that one work?
I found the answer. I needed to order the cut values and then remap the values of the range with the colors. Not elegant, but working...
data.col = as.data.frame(matrix(as.factor(cut(s$z,ordered_result = T,
include.lowest = TRUE,
right = TRUE,
breaks = nb.div)),
dim(s$z)[1],
dim(s$z)[2],byrow = FALSE))
range = levels(cut(s$z,ordered_result = T,
include.lowest = TRUE,
right = TRUE,
breaks = nb.div))
library(plyr)
for(i in 1:ncol(data.col)){
data.col[,i] <- mapvalues(data.col[,i],
from=range,
to=rbPal(nb.div),
warn_missing = FALSE)
}

Combine bar plot and stat_smooth() line from different data sets in ggplot2

I'm trying to overlay a stat_smooth() line from one dataset over a bar plot of another. Both csv files draw from the same dataset, but I had to make a new one for the bar plot because I had to add a few columns (including error bars) that wouldn't make sense in the big csv. So, I have code for the bar plot, and code for the line made using stat_smooth, but can't figure out how to combine them. I just want a graph with the line on top of the bars. Here's the code for the bar plot:
`e <- read.csv("Retro Complex.csv", header=T, sep=",")
e <- subset(e, Accuracy != 0)
limits <- aes(ymax = Confidence + SE, ymin = Confidence - SE)
e$Complexity <- factor(e$Complexity)
p <- ggplot(e, aes(e$Complexity, Confidence))
p +
geom_bar(position = "dodge", stat = "identity") +
geom_errorbar(limits, position = "dodge", width = 0.25) +
coord_cartesian(ylim=c(0,1)) +
scale_y_continuous(labels = percent) +
ggtitle("Retro")`
And here's for the line
`ggplot(retroacc, aes(x=Complexity.Sample, y=risk)) +
stat_smooth(aes(x=Complexity.Sample, y=risk), data=retroacc,
method="glm", method.args=list(family="binomial"), se=FALSE) +
ylim(0,1)`
Here's what they both look like:
Stat_smooth() line:
Barplot:
Sample Data
For the bar plot:
structure(list(Complexity = structure(1:5, .Label = c("1", "2",
"3", "4", "5"), class = "factor"), Accuracy = c(1L, 1L, 1L, 1L,
1L), Risk = c(0.69297164, 0.695793434, 0.695891571, 0.746606335,
0.748717949), SE = c(0.003621776, 0.004254081, 0.00669456, 0.008114764,
0.021963804), Proportion = c(0.823475656, 0.809299751, 0.863727821,
0.94724695, 0.882352941), SEAcc = c(0.002716612, 0.003267882,
0.004639995, 0.004059001, 0.015325003)), .Names = c("Complexity",
"Accuracy", "Confidence", "SE", "Proportion", "SEAcc"), row.names = c(1L,
3L, 5L, 7L, 9L), class = "data.frame")
For the line:
structure(list(risk = c(0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), acc = c(0L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
Uniqueness = c(0.405166959, 0.407414244, 0.285123931, 0.248994487,
0.259019778, 0.334552913, 0.300580793, 0.354632526, 0.309841996,
0.331460876, 0.289981111, 0.362405881, 0.37389863, 0.253672193,
0.342903451, 0.294459829, 0.387447291, 0.519657612, 0.278964406
), Average.Similarity = c(0.406700667, 0.409547355, 0.275663862,
0.240909144, 0.251796956, 0.31827466, 0.240574971, 0.349093002,
0.34253811, 0.348084627, 0.290495997, 0.318312198, 0.404143605,
0.290789337, 0.293259599, 0.320214236, 0.382449298, 0.506295194,
0.335167223), Complexity.Sample = c(8521L, 11407L, 3963L,
2536L, 2327L, 3724L, 4005L, 5845L, 5770L, 5246L, 3629L, 3994L,
4285L, 1503L, 8222L, 3683L, 5639L, 10288L, 3076L)), .Names = c("risk",
"acc", "Uniqueness", "Average.Similarity", "Complexity.Sample"
), class = "data.frame", row.names = c(NA, -19L))
So yeah, if any of you guys know how to combine these into one plot please let me know!!

Increasing size of circles in ggplot2 graphs [duplicate]

This question already has answers here:
How to increase size of the points in ggplot2, similar to cex in base plots?
(2 answers)
Closed 8 years ago.
I want to increase the scale of circles in ggplot2. I tried something like this aes(size=100*n) but it did not work for me.
df <-
structure(list(Logit = c(-2.9842723737754, 1.49511606166294,
-2.41756623714116, -2.96160412831003, -2.12996384688938, -1.61751836789074,
-0.454353048358851, 0.9284099250287, -0.144082412641708, -2.30422500981431,
-0.658367257547178, 0.082600042011989, -0.318343575566633, -0.717447827238429,
-1.0508122312565, -2.82559465551781, 0.361703788394458, -1.85086010050691,
-0.0916611209129359, -0.740116072703798, 0.0599317965466193,
-0.370764867295404, -0.703703748477917, -0.749040239408657, -2.7575899191217,
-2.51532401980067, 1.38177483433609, 1.47244781619757, -0.205002348239784,
0.135021333740761), PRes = c(-0.661648371860934, 1.63444424896772,
-0.30348016008728, -0.230651042355737, 1.07487559116003, -0.460143991337599,
-0.823052248365889, -0.999903730870253, -0.959022180953211, -0.321344960297977,
-1.40881799070885, -0.674754839222841, 0.239931843185434, -1.81660411888874,
0.830318780187542, -0.24702802619469, 0.692695708496924, -0.40412065378683,
-0.977640032689132, -0.715192962242284, -1.06270128658429, -0.856103053117159,
-0.731162073769824, 1.51334938767359, 4.02946801536109, 3.56902361409375,
0.505952430753934, 0.483660641952208, 1.13712619443209, 0.951889504154342
), n = c(7L, 38L, 1L, 1L, 11L, 1L, 1L, 4L, 1L, 1L, 3L, 9L, 2L,
8L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L)), .Names = c("Logit", "PRes", "n"), row.names = c(NA, -30L
), class = "data.frame")
library(ggplot2)
ggplot(data=df, mapping=aes(x=Logit, y=PRes, label=rownames(df))) +
geom_point(aes(size=n), shape=1, color="black") +
geom_text() +
theme_bw() +
theme(legend.position="none")
Simply add a scale for size:
+ scale_size_continuous(range = c(10, 15))

How can I make the label not overlap the point in R?

I am using this code to plot the following figure:
m = map_data('state', region = 'Oklahoma')
ggplot() +
geom_polygon(data=m, aes(x=long, y=lat,group=group),colour="black", fill="white" )+
geom_point(data=stations,aes(x=long,y=lat),,colour="red",)+
geom_text(data=stations,aes(x=,long,y=lat,label=name,fill = NULL, size=1))+
xlab('Longitude')+
ylab('Latitude')+
coord_fixed()
How can I stop the text from overlapping over the ticker? Thanks!
Data
dput(stations)
structure(list(coop = c(340017L, 340179L, 340256L, 340292L, 340548L,
340593L, 340908L, 341243L, 341504L, 341724L, 341828L, 342678L,
342912L, 342944L, 343497L, 343628L, 343821L, 343871L, 344055L,
344204L, 344235L, 344298L, 344573L, 344766L, 344861L, 345063L,
345509L, 345779L, 345855L, 346130L, 346139L, 346278L, 346629L,
346638L, 346670L, 346926L, 346935L, 347012L, 347254L, 348501L,
348677L, 349395L, 349422L, 349445L), lat = c(34.7864, 34.5903,
34.2208, 34.1714, 36.7683, 36.8125, 36.7236, 36.8003, 35.1756,
36.7747, 36.3225, 34.0003, 36.4194, 35.2164, 35.6267, 36.5914,
35.8161, 35.585, 36.0942, 34.9894, 35.0567, 36.8589, 36.7222,
36.9031, 35.8583, 34.6097, 34.8911, 35.505, 36.8833, 35.7781,
36.2283, 36.8914, 36.1217, 35.4253, 35.6239, 34.7253, 36.6692,
36.2886, 35.0539, 36.1175, 35.9369, 34.1747, 35.52, 35.4814),
long = c(-96.685, -99.3344, -95.615, -97.1294, -96.0261,
-100.5308, -102.4806, -99.6403, -98.5794, -98.3583, -95.5808,
-96.3686, -97.8747, -99.8628, -98.3225, -101.6181, -97.395,
-99.3953, -97.835, -99.0525, -96.3861, -101.2172, -97.7903,
-102.965, -97.9294, -98.4572, -99.5017, -96.9767, -94.8833,
-95.3339, -99.17, -97.0586, -98.315, -96.3033, -96.025, -97.2814,
-96.3472, -97.2897, -94.6264, -97.095, -94.9644, -97.9964,
-98.6986, -95.2039), elev = c(309.4, 420.6, 143.3, 268.2,
217.9, 751.3, 1259.7, 588.3, 451.4, 359.7, 179.2, 182.9,
379.5, 627.9, 487.7, 1008.9, 338.3, 554.7, 357.8, 474.3,
260.6, 912.9, 318.5, 1325.9, 320, 350.5, 486.2, 281.9, 245.4,
157.9, 576.1, 347.5, 370.3, 285, 197.2, 286.5, 254.5, 312.4,
134.1, 272.8, 259.1, 278, 493.2, 167.6), state = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "OK", class = "factor"),
name = structure(1:44, .Label = c("ADA", "ALTUS IRIG RSCH STN",
"ANTLERS", "ARDMORE", "BARTLESVILLE MUNI AP", "BEAVER", "BOISE CITY 2 E",
"BUFFALO 2 SSW", "CARNEGIE 5 NE", "CHEROKEE", "CLAREMORE 2 ENE",
"DURANT", "ENID", "ERICK", "GEARY", "GOODWELL RSCH STN",
"GUTHRIE 5S", "HAMMON 3 SSW", "HENNESSEY 4 ESE", "HOBART MUNI AP",
"HOLDENVILLE 2SSE", "HOOKER", "JEFFERSON", "KENTON", "KINGFISHER",
"LAWTON", "MANGUM", "MEEKER 5 W", "MIAMI", "MUSKOGEE", "MUTUAL",
"NEWKIRK 1NW", "OKEENE", "OKEMAH", "OKMULGEE WTR WKS", "PAULS VALLEY 4 WSW",
"PAWHUSKA", "PERRY", "POTEAU WTR WKS", "STILLWATER 2 W",
"TAHLEQUAH", "WAURIKA", "WEATHERFORD", "WEBBERS FALLS 5 WSW"
), class = "factor")), .Names = c("coop", "lat", "long",
"elev", "state", "name"), class = "data.frame", row.names = c(NA,
-44L))
The labels can still overlap each other, but they can be offset from the dots.
The command below adds some transparency, an offset to the text position, and makes it left justified.
geom_text(data=stations,aes(x=long+.05,y=lat,label=name,fill = NULL, size=1,hjust=0,alpha=.5))
To manually adjust the position of some labels in ggplot, you can add an "overlap" field containing a small offset to administer to each latitude:
overlapDOWN = c("JEFFERSON","HENNESSEY 4 ESE","GUTHRIE 5S","WEATHERFORD")
overlapUP = c("GEARY","STILLWATER 2 W","ADA","LAWTON")
stations$overlap=0
stations$overlap[stations$name %in% overlapUP] = .05
stations$overlap[stations$name %in% overlapDOWN] = -.05
Then use:
geom_text(data=stations,aes(x=long+.05,y=lat+overlap,label=name,fill = NULL, hjust=0,alpha=.5),size=3)

How to generate facetted ggplot graph where each facet has ordered data?

I want to sort my factors (Condition, Parameter and SubjectID) by MeanWeight and plot MeanWeight against SubjectID such that when faceted by Condition and Parameter, MeanWeight appears in descending order.
Here is my solution, which isn't giving me what I want:
dataSummary <- structure(list(SubjectID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("s001",
"s002", "s003", "s004"), class = "factor"), Condition = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("1", "2", "3"), class = "factor"), Parameter = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L), .Label = c("(Intercept)", "PrevCorr1", "PrevFail1"), class = "factor"),
MeanWeight = c(-0.389685536725783, 0.200987679398502, -0.808114314421089,
-0.10196105040707, 0.0274188815763494, 0.359978984195839,
-0.554583879312783, 0.643791202050396, -0.145042221940287,
-0.0144598460145723, -0.225804028997856, -0.928152539784374,
0.134025102103562, -0.267448309989731, -1.19980109795115,
0.0587152632631923, 0.0050656268880826, -0.156537446664213
)), .Names = c("SubjectID", "Condition", "Parameter", "MeanWeight"
), row.names = c(NA, 18L), class = "data.frame")
## Order by three variables
orderWeights <- order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight)
## Set factors to the new order. I expect this to sort for each facet when plotting, but it doesn't seem to work.
conditionOrder <- dataSummary$Condition[orderWeights]
dataSummary$Condition <- factor(dataSummary$Condition, levels=conditionOrder)
paramOrder <- dataSummary$Parameter[orderWeights]
dataSummary$Parameter <- factor(dataSummary$Parameter, levels=paramOrder)
sbjOrder <- dataSummary$SubjectID[orderWeights]
dataSummary$SubjectID <- factor(dataSummary$SubjectID, levels=sbjOrder)
## Plot
ggplot(dataSummary, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition, scales="free_y")
I tried a few other approaches, but they didn't work either:
dataSummary <- dataSummary[order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight),]
or this one
dataSummary <- transform(dataSummary, SubjectID=reorder(Condition, Parameter, SubjectID, MeanWeight))
You can order your data and plot it. However, the labels no longer correspond to Subject ID's, but to the reordered subjects. If that is not what you want, you cannot use faceting but have to plot the parts separately and use e.g.grid.arrangeto combind the different plots.
require(plyr)
## Ordered data
datOrder <- ddply(dataSummary, c("Condition", "Parameter"), function(x){
if (nrow(x)<=1) return(x)
x$MeanWeight <- x$MeanWeight[order(x$MeanWeight)]
x
})
## Plot
ggplot(datOrder, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition) +
scale_y_discrete(name="Ordered subjects")

Resources