Add points ggplot - r

Hi I have many data frame like this
id oldid yr mo dy lon lat
1 01206295 Aberfeldy 1885 3 22 -127.1 -31.78
2 05670001 05670005 1885 3 22 -4.38 49.15
3 06279 06279 1885 3 22 -123.5 37.5
4 106251 06323 1885 3 22 178.5 19.5
5 58FFF3618 58FFF3618 1885 3 22 -0.73 69.73
6 Achille.F Achille.F 1885 3 22 -35.62 -2.98
stored in different files myfiles and I am trying to plot the (lon,lat) points for each of them with the colour chosen according to the id value. So far I am doing like this
for (i in 1:length(myfiles)){
colnames(myfilesContent[[i]]) <-c("id","oldid","yr","mo","dy","lon","lat")
p <- ggplot() + geom_polygon(data=world_map,aes(x=long, y=lat,group=group))
myfilesContent[[i]]$lon <- as.numeric(myfilesContent[[i]]$lon)
myfilesContent[[i]]$lat <- as.numeric(myfilesContent[[i]]$lat)
p + geom_point(data=myfilesContent[[i]], aes(x=lon, y=lat, fill=as.factor(id)), size = 4, shape = 21, show_guide=FALSE)
print(p)
}
Anyway I am not sure that if an id is in different files it will be assigned with the same colour
Many thanks

You can make sure the levels for all your id columns are the same. First, get a master list of all the IDs from all the data.frames
allids <- unique(unlist(lapply(myfilesContent, function(x) levels(x[,1])))
Then make sure all the ID columns share these levels
lapply(seq_along(myfilesContent), function(i) {
myfilesContent[[i]][,1] < -factor(myfilesContent[[i]][,1], levels=allids)
})
If they have the same levels, they should get the same colors.

Related

Making multi-line plots in R using ggplot2

I would like to compile some data into a ggplot() line plot of different colors.
It's rainfall in various places over 100 days, and the data is quite different between locations which is giving me fits.
I've tried using different suggestions from this forum and they don't seem to be working well for this data. Sample data:
Time Location1 Location2 Location3
0 48 99.2966479761526 2
1 51 98.7287820735946 4
2 58 98.4803262236528 4.82842712474619
3 43 97.8941490454599 5.46410161513775
4 47 96.6091435402632 6
5 47 95.207282404881 6.47213595499958
6 41 94.8696538619697 6.89897948556636
7 34 94.6514389757067 7.29150262212918
8 40 93.7297335476615 7.65685424949238
9 57 93.2440731907263 8
My code thus far is
ggplot(Rain) +
geom_line(aes(x=Time,y=Location1,col="red")) +
geom_line(aes(x=Time,y=Location2,col="blue")) +
geom_line(aes(x=Time,y=Location3,col="green")) +
scale_color_manual(labels = c("Location 1","Location 2","Location 3"),
values = c("red","blue","green")) +
xlab("Time (Days)") + ylab("Rainfall (Inches)") + labs(color="Locations") +
ggtitle("Rainfall Over 100 Days In Three Locations")
So far it gives me everything that I want but for some reason the colors are wrong when I plot it, i.e. it plots location 1 in green while I told it red in my first geom_line.
library(tidyr)
library(ggplot2)
df_long <- gather(data = df1, Place, Rain, -Time)
ggplot(df_long) +
geom_line(aes(x=Time, y=Rain, color=Place))
Data:
df1 <- read.table(text="Time Location1 Location2 Location3
0 48 99.2966479761526 2
1 51 98.7287820735946 4
2 58 98.4803262236528 4.82842712474619
3 43 97.8941490454599 5.46410161513775
4 47 96.6091435402632 6
5 47 95.207282404881 6.47213595499958
6 41 94.8696538619697 6.89897948556636
7 34 94.6514389757067 7.29150262212918
8 40 93.7297335476615 7.65685424949238
9 57 93.2440731907263 8",
header=T, stringsAsFactors=F)

R: ggplot2 - plots doesn't look right - vertical instead of diagonal lines

I'd like to create a plot for 2 categorical variables. Therefore I created two dummy sets but - although they contain the same items - they look totally different. Same happens with my real data as well.
I also tried to perform it with ordered columns, same result.
Please see attached my coding, the three plots (plot 2 is the best practice) + real data plot (but anonymized) to show you problem. Don't understand why there are those vertical lines.
Thank you in advance
library(ggplot2)
library(dplyr)
dat1 <- data.frame(
sex = factor(c("Male","Female","Male","Female")), levels=c("Female","Male"),
time = factor(c("Lunch","Dinner","Lunch","Dinner"), levels=c("Lunch","Dinner")),
total_bill = c(17.42, 16.81, 16.24, 13.53)
)
dat1
#plot1: shows horizontal lines although it should look like the plot 2
ggplot(data=dat1, aes(x=sex, y=total_bill, group=time, colour=time)) +
geom_line() +
geom_point() +
ggtitle("Plot 1")
#different approach for plot1
arrange(dat1 , sex, time)
dat1
#has ordered columns like I wanted it to be
#still looks like plot1
ggplot(data=dat1, aes(x=sex, y=total_bill, group=time, colour=time)) +
geom_line() +
geom_point() +
ggtitle("Plot 1 ordered")
dat2 <- data.frame(
sex = factor(c("Female","Female","Male","Male")),
time = factor(c("Lunch","Dinner","Lunch","Dinner"), levels=c("Lunch","Dinner")),
total_bill = c(13.53, 16.81, 16.24, 17.42)
)
dat2
#plot2: look like I'd like to have it this way
ggplot(data=dat2, aes(x=sex, y=total_bill, group=time, colour=time)) +
geom_line() +
geom_point() +
ggtitle("Plot 2")
Also an outline of the data plot which has those vertical lines in it
library(ggplot2)
library(dplyr)
mydata2
# ####Output (fictive data but same problem as with my real data, still vertical lines in it but should only have 2 lines like in plot2)
# group NM score
# 1 KG S 2537.94514
# 2 EG S 1766.39019
# 3 KG S 3970.91802
# 4 KG S 4089.14939
# 5 KG S 2795.42964
# 6 EG S 2286.60411
# 7 KG S 4027.22993
# 8 KG S 1030.18328
# 9 EG S 719.73679
# 10 EG S 724.93663
# 11 EG S 2929.03717
# 12 EG S 521.55736
# 13 KG S 1435.85625
# 14 EG S 1496.39471
# 15 EG S 3521.25827
# 16 KG S 2138.17928
# 17 EG S 1233.86267
# 18 KG S 591.33086
# 19 EG S 2171.97341
# 20 EG S 3871.92536
# 21 EG S 468.10133
# 22 KG S 2419.67419
# 23 KG S 1338.29305
# 24 KG S 1629.33862
# 25 EG S 560.39680
# 26 EG S 546.22468
# 27 KG S 3398.94647
# 28 KG S 1117.72716
# 29 EG S 2794.90527
# 30 EG S 3606.77693
# 31 KG S 3558.67156
# 32 KG S 196.64992
# 33 EG S 2174.69930
# 34 EG S 3444.10732
# 35 KG S 670.60907
# 36 EG S 3719.20997
# 37 KG S 65.76227
# 38 EG S 3420.12225
# 39 KG S 1405.83738
# 40 KG S 2859.33873
# 41 EG T 1296.75111
# 42 EG T 436.53580
# 43 KG T 213.09334
# 44 EG T 2073.70465
# 45 KG T 1679.98816
# 46 EG T 1599.26738
# 47 EG T 777.65179
# 48 EG T 1738.45395
# 49 KG T 3269.54120
# 50 EG T 3506.07302
# 51 EG T 1764.61915
# 52 EG T 493.47846
# 53 KG T 1729.02949
# 54 EG T 1454.57702
# 55 EG T 2577.32018
# 56 EG T 295.08653
# 57 EG T 3811.24064
# 58 KG T 2320.35879
# 59 EG T 1285.65291
# 60 KG T 3600.26095
# 61 EG T 3738.89452
# 62 KG T 3472.53512
# 63 KG T 1203.33462
# 64 EG T 1809.41229
# 65 EG T 3536.17972
# 66 EG T 2637.59869
# 67 KG T 1279.44567
# 68 KG T 1141.81247
# 69 KG T 3951.54206
# 70 KG T 1940.11505
# 71 KG T 192.74602
# 72 KG T 1235.81839
# 73 EG T 1907.09384
# 74 KG T 1772.86806
# 75 KG T 997.92437
# 76 KG T 217.81433
# 77 KG T 3595.69359
# 78 EG T 910.07955
# ####End of output
ggplot(data=mydata2, aes(x=group, y=score, group=NM, shape=NM, colour=NM)) +
geom_line(aes(linetype=NM), size=1) + # Set linetype by sex
geom_point(size=3, fill="white") + # Use larger points, fill with white
expand_limits(y=0) + # Set y range to include 0
scale_colour_hue(name="Sex of participant", # Set legend title
l=30) + # Use darker colors (lightness=30)
scale_shape_manual(name="Sex of participant",
values=c(22,21)) + # Use points with a fill color
scale_linetype_discrete(name="Sex of participant") +
xlab("Group") + ylab("Score") + # Set axis labels
ggtitle("Data") + # Set title
theme_bw() +
theme(legend.position=c(.7, .4)) # Position legend inside
# This must go after theme_bw
Plot1
Plot1 ordered
Plot2
Data plot which looks wrong
the data2 that makes the plot you want looks like this:
> dat2
sex time total_bill
1 Female Lunch 13.53 # female has lunch and dinner
2 Female Dinner 16.81
3 Male Lunch 16.24 # male has lunch and dinner
4 Male Dinner 17.42
However your data1 before and after the arrange looks like this:
before
sex levels time total_bill
1 Male Female Lunch 17.42
2 Female Male Dinner 16.81 # female only has dinner
3 Male Female Lunch 16.24 # male only has lunch
4 Female Male Dinner 13.53
after
sex levels time total_bill
1 Female Male Dinner 16.81 # female only has dinner
2 Female Male Dinner 13.53
3 Male Female Lunch 17.42 # male only has lunch
4 Male Female Lunch 16.24
In both data sets females either only have lunch and males only have dinner. Also for the levels column there is the same problem.
So the lines in your plots are drawn between the two female points at dinner, rather than across females points at lunch and dinner.
For your updated question, I run your code with the column names like this:
# group NM sex score
# 1 KG S 2537.945
# 2 EG S 1766.390
# 3 KG S 3970.918
and get this? what is the issue?

ggplot doesn't show the second geom_line() in my plot

My df:
p1 p2 p3 x y
0 3000 14 0.0 0.026500
20 3000 14 11.0 0.054000
30 3000 14 17.9 0.057000
60 3000 14 49.3 0.064000
80 3000 14 77.4 0.063000
60 3500 14 45.3 0.061000
60 4000 14 41.4 0.058300
60 4400 14 43.7 0.073600
60 3500 9 41.7 0.060556
60 3500 18 46.7 0.060700
60 3500 21 49.2 0.059900
This is the result of a "one parameter at a time" experimental design, i.e., one where the parameters p1, p2 and p3 were changed one at a time (definitely not the best kind of DOE, but that's what I got). For each observation, two variables are measured, x and y. I would like to plot a line connecting all points of the p1 study (the first 5 rows), a line connecting all points of the p2 study (rows 4 and 6:8) and a third line connecting the points of the p3 study (rows 6 and 9:11). I tried with
ggplot(df, aes(x = x, y = y, color = p2)) +
geom_point( aes(shape = p3)) +
geom_line() +
geom_line(data = filter(df, p1 == "60" & p3 == "14"), aes(x = x, y = y))
The red and the green line correspond to the p1 and p3 study, but ggplot doesn't plot the line corresponding to the p2. How can I manage to plot it? In practice, I need either a geom_path or a geom_line connecting the triangle symbols in the center of the screen (x coordinate between 40 and 50).

geom_bar labeling for melted data / stacked barplot

I have a problem with drawing stacked barplot with ggplot. My data looks like this:
timeInterval TotalWilling TotalAccepted SimID
1 16 12 Sim1
1 23 23 Sim2
1 63 60 Sim3
1 69 60 Sim4
1 61 60 Sim5
1 60 54 Sim6
2 16 8 Sim1
2 23 21 Sim2
2 63 52 Sim3
2 69 64 Sim4
2 61 45 Sim5
2 60 32 Sim6
3 16 14 Sim1
3 23 11 Sim2
3 63 59 Sim3
3 69 69 Sim4
3 61 28 Sim5
3 60 36 Sim6
I would like to draw a stacked barplot for each simID over a timeInterval, and Willing and Accepted should be stacked. I achieved the barplot with the following simple code:
dat <- read.csv("myDat.csv")
meltedDat <- melt(dat,id.vars = c("SimID", "timeInterval"))
ggplot(meltedDat, aes(timeInterval, value, fill = variable)) + facet_wrap(~ SimID) +
geom_bar(stat="identity", position = "stack")
I get the following graph:
Here my problem is that I would like to put percentages on each stack. Which means, I want to put percentage as for Willing label: (Willing/(Willing+Accepted)) and for Accepted part, ((Accepted/(Accepted+Willing)) so that I can see how many percent is willing how many is accepted such as 45 on red part of stack to 55 on blue part for each stack. I cannot seem to achieve this kind of labeling.
Any hint is appreciated.
applied from Showing data values on stacked bar chart in ggplot2
meltedDat <- melt(dat,id.vars = c("SimID", "timeInterval"))
meltedDat$normvalue <- meltedDat$value
meltedDat$valuestr <- sprintf("%.2f%%", meltedDat$value, meltedDat$normvalue*100)
meltedDat <- ddply(meltedDat, .(timeInterval, SimID), transform, pos = cumsum(normvalue) - (0.5 * normvalue))
ggplot(meltedDat, aes(timeInterval, value, fill = variable)) + facet_wrap(~ SimID) + geom_bar(stat="identity", position = "stack") + geom_text(aes(x=timeInterval, y=pos, label=valuestr), size=2)
also, it looks like you may have some of your variables coded as factors.

ggplot create map with arrows

I have a data frame like this
id lon lat
1 A -69.5 -58.5
2 A -69.5 -58.5
3 A -69.5 -57.5
4 A -68.5 -57.5
5 A -68.5 -57.5
6 A -68.5 -57.5
7 A -66.5 -57.5
8 A -68.5 -56.5
9 A -68.5 -56.5
10 A -67.5 -56.5
11 A -65.5 -56.5
12 A -65.5 -56.5
13 A -65.5 -55.5
14 A -62.5 -54.5
15 B -177 -52.5
16 B -178 -50.5
17 B -179 -48.5
18 B 179 -47.5
19 B 178 -46.5
20 B 177 -46.5
and I want to produce a map of the position of A and B, linked by oriented lines. However when ids cross the Pacific (lon=-180 -> lon=+180) I get an arrow crossing the whole figure, like shown below.
This is the code I am using
worldmap = map_data("world")
ggplot(test, aes(x = lon, y=lat, colour = factor(id))) +
geom_polygon(data=worldmap,center=180,aes(x=long, y=lat, group=group), fill="black",colour="black") +
xlab("") +ylab("")+theme(axis.text=element_blank(),axis.ticks=element_blank())+ theme(panel.background = element_rect(fill = 'white', colour = 'black') ,panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
geom_path(size =2,arrow = arrow(angle=30,length = unit(0.6, "inches")))
How can I fix it?
Thanks
I guess that depends on what you think the "right" think to do is. I decided to break up the pathes that cross the glob into two segments by adding in points at the edge of the map, and then creating a "sequence" indicator so ggplot knows which lines to connect. Here's the transformation for your sample data
test2 <- do.call(rbind, lapply(split(test, test$id), function(x) {
cp <- cumsum(c(FALSE, diff(x$lon)>250))
xx<-split(x, cp)
xx<-Map(cbind, xx, seq=seq_along(xx))
Reduce(function(a,b) {
lasta<-a[nrow(a),]
firstb<-b[1,]
lasta$lon <- 180*sign(lasta$lon)
firstb$lon <- 180*sign(firstb$lon)
lasta$lat <- mean(lasta$lat, firstb$lat)
firstb$lat <- lasta$lat
rbind(a,lasta, firstb,b)
}, xx)
}))
tail(test2)
# id lon lat seq
# B.17 B -179 -48.5 1
# B.171 B -180 -48.5 1
# B.18 B 180 -48.5 2
# B.181 B 179 -47.5 2
# B.19 B 178 -46.5 2
# B.20 B 177 -46.5 2
here you can see that we've broken the B line up into two sequences. Then if we use a group aesthetic
geom_path(aes(group=interaction(id, seq)), ...)
then R will only connect those points that are in the same id/seq group. This will prevent the line from going across the ocean. However, because we are drawing two lines for that group rather than one, there's no way to turn of the arrow head for just one of the segments. you might want to find another way to indicate start/end.

Resources