excluding new layer from scale_size - r

I have plotted a scatter plot with the point size scaled by frequency:
g<-ggplot(d, aes(x=Treatment, y= Seam.Cell.Number, size=Frequency))+geom_point(aes(colour=Strain))+ scale_size_continuous(range = c(3, 10), breaks=c(0,1, 2, 3, 4, 5,6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))+guides(size=FALSE)
Now I am trying to plot means with standard error bars on top. I have the mean and standard error already calculated in columns in my csv file. So so far I have attempted:
g+geom_point(aes(x=Treatment,y=Mean))+geom_errorbar(aes(ymin=Mean-Standard.Error, ymax=Mean+Standard.Error, width=.4))+theme(axis.text.x = element_blank())+theme(legend.key = element_rect(colour = "black"))
And:
g+layer(data=d, mapping=aes(x=Treatment,y=Mean), geom="point")+geom_errorbar(aes(ymin=Mean-Standard.Error, ymax=Mean+Standard.Error), width=.4)+ylab("Seam Cell Number")
But they both give me very fat error bars/data points. It seems they are being affected by my size scaling in object g. I have tried to modify the size and width of the error bars, and I have tried to modify the size of the data points, both in these last bits of code, but to no avail. Is there a way to 'cancel' the size command for this layer?

If you reverse the order of your ggplot, you may be able to avoid the size distortion on the error bars.
Not having reproducible data, I made some up.
df <- data.frame(Treatment = (1:100), Seam.Cell.Number = 3:102, Frequency = 5:104,
Strain = rep(c("A", "B", "C", "D"), 25))
std <- function(x) sd(x)/sqrt(length(x))
Mean <- mean(df$Treatment)
df$Standard.Error <- std(df$Treatment)
g <- ggplot(df, aes(x = Treatment, y = Seam.Cell.Number)) +
geom_point(aes(x=Treatment, y=Mean)) +
geom_errorbar(aes(ymin=Mean-df$Standard.Error, ymax=Mean+df$Standard.Error, width=.4))+
theme(axis.text.x = element_blank())+
theme(legend.key = element_rect(colour = "black"))
g + geom_point(aes(colour=Strain)) +
scale_size_continuous(range = c(3, 10), breaks=c(0,1, 2, 3, 4, 5,6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50)) +
guides(size=FALSE)

Related

tidying igraph plot and routing or TSP question

I have less experience in R and I need help tidying my plot as it looks messy. Also, my project is to find the best minimal route from Seoul to every city and back to Seoul. It is almost like Traveling Salesman Problem (TSP) but there are some cities needed to be visited more than once as it is the only way to reach certain cities. I don't know how to do and what packages to use.
This is my code for igraph plot
library(igraph)
g1 <- graph( c("Seoul","Incheon","Seoul","Goyang","Seoul","Seongnam","Seoul",
"Bucheon","Seoul","Uijeongbu","Seoul","Gimpo",
"Seoul","Gwangmyeong", "Seoul", "Hanam","Seoul", "Guri",
"Seoul","Gwacheon","Busan","Changwon","Busan","Gimhae",
"Busan","Jeju","Busan","Yangsan","Busan","Geoje",
"Incheon","Goyang","Incheon","Bucheon","Incheon","Siheung",
"Incheon","Jeju","Incheon","Gimpo","Daegu","Gumi",
"Daegu","Gyeongsan","Daegu","Yeongcheon","Daejeon",
"Cheongju","Daejeon","Nonsan","Daejeon","Gongju",
"Daejeon","Gyeryong","Gwangju","Naju","Suwon","Yongin",
"Suwon","Seongnam","Suwon","Hwaseong","Suwon","Ansan",
"Suwon","Gunpo","Suwon","Osan","Suwon","Uiwang",
"Ulsan","Yangsan","Ulsan","Gyeongju","Ulsan","Miryang",
"Yongin","Seongnam","Yongin","Hwaseong","Yongin","Pyeongtaek",
"Yongin","Gwangju-si","Yongin","Icheon","Yongin","Anseong",
"Yongin","Uiwang","Goyang","Gimpo","Goyang","Paju","Goyang",
"Yangju","Changwon","Gimhae","Changwon","Jinju","Changwon",
"Miryang","Seongnam","Gwangju-si","Seongnam","Hanam","Seongnam",
"Uiwang","Seongnam","Gwacheon","Hwaseong","Ansan","Hwaseong",
"Pyeongtaek","Hwaseong","Gunpo","Hwaseong","Osan","Cheongju",
"Cheonan","Cheongju","Sejong","Bucheon","Siheung","Bucheon",
"Gwangmyeong","Ansan","Anyang","Ansan","Siheung","Ansan",
"Gunpo","Namyangju","Uijeongbu","Namyangju","Chuncheon",
"Namyangju","Hanam","Namyangju","Guri","Cheonan","Pyeongtaek",
"Cheonan","Sejong","Cheonan","Asan","Cheonan","Anseong",
"Jeonju","Gimje","Gimhae","Yangsan","Gimhae","Miryang",
"Pyeongtaek","Asan","Pyeongtaek","Osan","Pyeongtaek","Anseong",
"Pyeongtaek","Dangjin","Anyang","Siheung","Anyang","Gwangmyeong",
"Anyang","Gunpo","Anyang","Gwacheon","Siheung","Gwangmyeong",
"Siheung","Gunpo","Pohang","Yeongcheon","Pohang","Gyeongju",
"Jeju","Gimpo","Jeju","Mokpo","Jeju","Seogwipo","Uijeongbu",
"Yangju","Uijeongbu","Pocheon","Paju","Yangju","Gumi","Gimcheon",
"Gumi","Sangju","Gwangju-si","Hanam","Gwangju-si","Icheon",
"Gwangju-si","Yeoju","Sejong","Gongju","Wonju","Chungju",
"Wonju","Jecheon","Wonju","Yeoju","Jinju","Sacheon", "Yangsan",
"Miryang","Asan","Gongju","Iksan","Gunsan","Iksan","Nonsan",
"Iksan","Gimje","Chuncheon","Pocheon","Gyeongsan","Yeongcheon",
"Gunpo","Uiwang","Suncheon","Yeosu","Suncheon","Gwangyang",
"Gunsan","Gimje","Gyeongju","Yeongcheon","Geoje","Tongyeong",
"Osan","Anseong","Yangju","Pocheon","Yangju","Dongducheon",
"Icheon","Anseong","Icheon","Yeoju","Mokpo","Naju","Chungju",
"Jecheon","Chungju","Yeoju","Chungju","Mungyeong","Gangneung",
"Donghae","Gangneung","Sokcho","Seosan","Dangjin","Andong",
"Yeongju","Pocheon","Dongducheon","Gimcheon","Sangju","Tongyeong",
"Sacheon","Nonsan","Gongju","Nonsan","Boryeong","Nonsan",
"Gyeryong","Gongju","Boryeong","Gongju","Gyeryong","Jeongeup",
"Gimje","Yeongju","Mungyeong","Yeongju","Taebaek","Sangju",
"Mungyeong","Sokcho","Samcheok","Samcheok","Taebaek",
"Suncheon","Gwangju"), directed=F)
E(g1)$distance <- c(27, 16, 20, 19, 20, 24, 14, 20, 15, 15, 36, 18, 299, 18, 53,
25, 8, 12, 440, 18, 36, 13, 33, 33, 31, 26, 15, 20, 13, 20,
19, 18, 13, 16, 10, 33, 36, 51, 24, 31, 28, 21, 23, 27, 22,
11, 12, 24, 18, 52, 27, 11, 13, 19, 13, 14, 34, 20, 23, 38,
18, 12, 9, 12, 7, 10, 19, 53, 11, 8, 20, 27, 11, 26, 24, 18,
33, 25, 18, 15, 44, 14, 12, 4, 5, 12, 12, 37, 21, 458, 146,
27, 10, 23, 24, 21, 36, 14, 23, 36, 21, 39, 33, 26, 20, 32,
40, 20, 29, 18, 47, 24, 4, 27, 19, 22, 29, 17, 24, 18, 13,
32, 18, 37, 28, 43, 51, 33, 56, 20, 28, 12, 30, 38, 29, 47,
17, 47, 22, 26, 46, 51, 20, 10, 36,63)
plot(g1, edge.label=E(g1)$distance,
vertex.label.cex=0.6, vertex.size=4)
igraph plot
Using trick from https://or.stackexchange.com/questions/5555/tsp-with-repeated-city-visits
library(data.table)
library(purrr)
library(TSP)
library(igraph)
We need to create distance matrix based on shortest paths for each pair of vertices:
vertex_names <- names(V(g1))
N <- length(vertex_names)
dt <- map(
head(seq_along(vertex_names), -1),
~data.table(
from = vertex_names[[.x]],
to = vertex_names[(.x+1):N],
path = map(
shortest_paths(g1, vertex_names[[.x]], vertex_names[(.x+1):N])[["vpath"]],
names
)
),
) %>%
rbindlist()
then we calculate distances of shortest paths:
m <- as_adjacency_matrix(g1, type = "both", attr = "distance", sparse = FALSE)
dt[, weight := map_dbl(path, ~sum(m[embed(.x, 2)[, 2:1, drop=FALSE]]))]
now we assemble new matrix:
dt <- rbind(
dt, dt[, .(from = to, to = from, path = map(path, rev), weight = weight)]
)
new_m <- matrix(0, N, N)
rownames(new_m) <- colnames(new_m) <- vertex_names
new_m[as.matrix(dt[, .(from,to)])] <- dt[["weight"]]
on this new matrix we use some heuristic to solve TSP (for exact solution you should use method="concorde"):
res <- new_m %>%
TSP() %>%
solve_TSP(repetitions = 1000, two_opt = TRUE)
now we exchange each pair of consecutive cities with shortest path:
start_city <- "Seoul"
path_dt <- c(start_city, labels(cut_tour(res, start_city)), start_city) %>%
embed(2) %>%
.[,2:1,drop = FALSE] %>%
"colnames<-"(c("from", "to")) %>%
as.data.table()
path_dt <- dt[path_dt, on = .(from ,to)]
my_path <- c(unlist(map(path_dt[["path"]], head, -1)), start_city)
my_path is heuristic solution with distance tour_length(res)

Remove community boxes in igraph

I have created a simple minimum spanning tree and now have a data frame with columns 'from', 'to' and 'distance'.
Based on this, I found communities using the Louvain method, which I plotted. As far as I understand it, for clustering and plotting I need only the columns from and to, and the distance is not used.
How can I keep the communities I found, ideally each in a different color, but remove the box around the communities?
library(igraph)
from <- c(14, 25, 18, 19, 29, 23, 24, 36, 5, 22, 21, 29, 18, 26, 2, 45, 8, 7, 36, 42, 3, 23, 13, 13, 20, 15, 13, 7, 28, 9, 6, 37, 8, 4, 15, 27, 10, 2, 39, 1, 43, 21, 14, 4, 14, 8, 9, 40, 31, 1)
to <- c(16, 26, 27, 20, 32, 34, 35, 39, 6, 32, 35, 30, 22, 28, 45, 46, 48, 12, 38, 43, 42, 24, 27, 25, 30, 20, 50, 29, 34, 49, 40, 39, 11, 41, 46, 47, 50, 16, 46, 40, 44, 31, 17, 40, 44, 23, 33, 42, 33, 1)
distance <- c(0.3177487, 0.3908324, 0.4804059, 0.4914682, 0.5610357, 0.6061082, 0.6357532, 0.6638961, 0.7269725, 0.8136463, 0.8605391, 0.8665838, 0.8755252, 0.8908454, 0.9411793, 0.9850834, 1.0641603, 1.0721154, 1.0790506, 1.1410964, 1.1925349, 1.2115428, 1.2165045, 1.2359032, 1.2580204, 1.2725243, 1.2843610, 1.2906908, 1.3070725, 1.3397053, 1.3598817, 1.3690732, 1.3744088, 1.3972220, 1.4472312, 1.4574936, 1.4654772, 1.4689660, 1.5999424, 1.6014316, 1.6305410, 1.6450413, 1.6929959, 1.7597620, 1.8113320, 2.0380866, 3.0789517, 4.0105981, 5.1212614, 0.0000000)
mst <- cbind.data.frame(from, to, distance)
g <- graph.data.frame(mst[, 1:2], directed = FALSE)
lou <- cluster_louvain(g)
set.seed(1)
plot(lou, g, vertex.label = NA, vertex.size=5)
The blobs around the groups can be turned off like this:
plot(lou, g, vertex.label = NA, vertex.size=5, mark.groups = NULL)
Do you want this?
plot(lou, g, vertex.label = NA, vertex.size = 5, mark.border = NA)

Complete() empty columns in r for time series (Tidyr)

I'm having difficulty using the Tidyr function "complete()" to fill in columns for absent weeks. While the complete() function does work, it loops through the entire year 35 times and fills in 4,375 entries rather than just 125.
In short, when I try to use the complete function, it does not just complete the dataframe but duplicates all columns 35 times.
I have tried several different approaches including with and w/o the full_seq function.
1st approach:
Df %>%
group_by(week = week(`Local Start Time`)) %>%
complete(week = 1:52)
Second approach:
Df %>%
group_by(week = week(`Local Start Time`)) %>%
complete(week = full_seq(week <- c(1:52), 1L))
I expected the dataframe to stop at row 125 but instead the complete() loops over the entire yearly data (35 times!) and continues until column 4375.
Any advice is appreciated, thanks!
The data I used is here...
structure(list(`Local Start Time` = structure(c(1483846399, 1483846519,
1483851979, 1484734742, 1485017522, 1485190862, 1486236902, 1486238462,
1486347422, 1486448822, 1487221742, 1487392502, 1487449502, 1487678750,
1487679111, 1487679411, 1487683370, 1488321651, 1488745130, 1489353950,
1489710710, 1491043550, 1492036467, 1492105535, 1492150284, 1492180823,
1492772358, 1493428578, 1493440398, 1493465717, 1493476518, 1493484558,
1493495837, 1493622558, 1493639598, 1493718078, 1493718858, 1493720778,
1495021772, 1495598357, 1495599017, 1496175677, 1496428517, 1496439678,
1496494637, 1496632757, 1496887457, 1496887757, 1496888117, 1497031577,
1497207557, 1497318797, 1497368657, 1497491178, 1497558017, 1497857478,
1498220117, 1498245977, 1498246577, 1498255277, 1498257797, 1499203517,
1499470577, 1500752057, 1500899837, 1502036477, 1502392277, 1502410817,
1502428157, 1502429957, 1503492618, 1503500417, 1503507318, 1503672677,
1503674057, 1503674370, 1503675077, 1503923478, 1503928037, 1503932777,
1503943037, 1503972019, 1503989537, 1504383497, 1504421837, 1504639337,
1504656977, 1504672937, 1504682418, 1504722677, 1506766878, 1507180518,
1507184597, 1507228877, 1507229657, 1507370717, 1508326217, 1508343977,
1508357297, 1508374397, 1508492838, 1508555177, 1508560158, 1508868737,
1509231244, 1509252184, 1509845644, 1510709818), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), week = c(2, 2, 2, 3, 3, 4, 5, 5, 6,
6, 7, 7, 7, 8, 8, 8, 8, 9, 10, 11, 11, 13, 15, 15, 15, 15, 16,
17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 20, 21, 21, 22, 22,
22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 25, 25, 25, 25,
25, 25, 27, 27, 29, 30, 32, 32, 32, 32, 32, 34, 34, 34, 34, 34,
34, 34, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 39,
40, 40, 40, 40, 40, 42, 42, 42, 42, 42, 42, 42, 43, 43, 44, 45,
46)), class = "data.frame", row.names = c(NA, -108L), .Names = c("Local Start Time",
"week"))

Merge and Perfectly Align Histogram and Boxplot using ggplot2

since yesterday I am reading answers and websites in order to combine and align in one plot an histogram and a boxplot generated using ggplot2 package.
This question differs from others because the boxplot chart needs to be reduced in height and aligned to the left outer margin of the histogram.
Considering the following dataset:
my_df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100), value= c(18, 9, 3,
4, 3, 13, 12, 5, 8, 37, 64, 107, 11, 11, 8, 18, 5, 13, 13, 14,
11, 11, 9, 14, 11, 14, 12, 10, 11, 10, 5, 3, 8, 11, 12, 11, 7,
6, 6, 4, 11, 8, 14, 13, 14, 15, 10, 2, 4, 4, 8, 15, 21, 9, 5,
7, 11, 6, 11, 2, 6, 16, 5, 11, 21, 33, 12, 10, 13, 33, 35, 7,
7, 9, 2, 21, 32, 19, 9, 8, 3, 26, 37, 5, 6, 10, 18, 5, 70, 48,
30, 10, 15, 18, 7, 4, 19, 10, 4, 32)), row.names = c(NA, 100L
), class = "data.frame", .Names = c("id", "value"))
I generated the boxplot:
require(dplyr)
require(ggplot2)
my_df %>% select(value) %>%
ggplot(aes(x="", y = value)) +
geom_boxplot(fill = "lightblue", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
and I generated the histogram
my_df %>% select(id, value) %>%
ggplot() +
geom_histogram(aes(x = value, y = (..count..)/sum(..count..)),
position = "identity", binwidth = 1,
fill = "lightblue", color = "black") +
ylab("Relative Frequency") +
theme_classic()
The result I am looking to obtain is a single plot like:
Note that the boxplot must be reduced in height and the ticks must be exactly aligned in order to give a different perspective of the same visual.
You can use either egg, cowplot or patchwork packages to combine those two plots. See also this answer for more complex examples.
library(dplyr)
library(ggplot2)
plt1 <- my_df %>% select(value) %>%
ggplot(aes(x="", y = value)) +
geom_boxplot(fill = "lightblue", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
plt2 <- my_df %>% select(id, value) %>%
ggplot() +
geom_histogram(aes(x = value, y = (..count..)/sum(..count..)),
position = "identity", binwidth = 1,
fill = "lightblue", color = "black") +
ylab("Relative Frequency") +
theme_classic()
egg
# install.packages("egg", dependencies = TRUE)
egg::ggarrange(plt2, plt1, heights = 2:1)
cowplot
# install.packages("cowplot", dependencies = TRUE)
cowplot::plot_grid(plt2, plt1,
ncol = 1, rel_heights = c(2, 1),
align = 'v', axis = 'lr')
patchwork
# install.packages("devtools", dependencies = TRUE)
# devtools::install_github("thomasp85/patchwork")
library(patchwork)
plt2 + plt1 + plot_layout(nrow = 2, heights = c(2, 1))

Add vertical lines to time-series plot

I have the code below which plots two time-series. I'd like to add a vertical line every say 10 units on the x-axis to basically divide the plot up into like 5 squares. Any tips are very much appreciated.
Code:
## Plot Forecast & Actual
ts.plot(ts(CompareDf$stuff1),ts(CompareDf$stuff2),col=1:2,xlab="Hour",ylab="Minu tes",main='testVar')
legend("topleft", legend = c("Actual","Forecast"), col = 1:2, lty = 1)
Data:
dput(CompareDf)
structure(list(stuff1 = c(6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55), stuff2 = c(8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)), .Names = c("stuff1",
"stuff2"), row.names = c(NA, -50L), class = "data.frame")
After plotting timeseries data, use abline to draw vertical lines.
abline(v = seq(10, 50, 10))

Resources