Related
I have less experience in R and I need help tidying my plot as it looks messy. Also, my project is to find the best minimal route from Seoul to every city and back to Seoul. It is almost like Traveling Salesman Problem (TSP) but there are some cities needed to be visited more than once as it is the only way to reach certain cities. I don't know how to do and what packages to use.
This is my code for igraph plot
library(igraph)
g1 <- graph( c("Seoul","Incheon","Seoul","Goyang","Seoul","Seongnam","Seoul",
"Bucheon","Seoul","Uijeongbu","Seoul","Gimpo",
"Seoul","Gwangmyeong", "Seoul", "Hanam","Seoul", "Guri",
"Seoul","Gwacheon","Busan","Changwon","Busan","Gimhae",
"Busan","Jeju","Busan","Yangsan","Busan","Geoje",
"Incheon","Goyang","Incheon","Bucheon","Incheon","Siheung",
"Incheon","Jeju","Incheon","Gimpo","Daegu","Gumi",
"Daegu","Gyeongsan","Daegu","Yeongcheon","Daejeon",
"Cheongju","Daejeon","Nonsan","Daejeon","Gongju",
"Daejeon","Gyeryong","Gwangju","Naju","Suwon","Yongin",
"Suwon","Seongnam","Suwon","Hwaseong","Suwon","Ansan",
"Suwon","Gunpo","Suwon","Osan","Suwon","Uiwang",
"Ulsan","Yangsan","Ulsan","Gyeongju","Ulsan","Miryang",
"Yongin","Seongnam","Yongin","Hwaseong","Yongin","Pyeongtaek",
"Yongin","Gwangju-si","Yongin","Icheon","Yongin","Anseong",
"Yongin","Uiwang","Goyang","Gimpo","Goyang","Paju","Goyang",
"Yangju","Changwon","Gimhae","Changwon","Jinju","Changwon",
"Miryang","Seongnam","Gwangju-si","Seongnam","Hanam","Seongnam",
"Uiwang","Seongnam","Gwacheon","Hwaseong","Ansan","Hwaseong",
"Pyeongtaek","Hwaseong","Gunpo","Hwaseong","Osan","Cheongju",
"Cheonan","Cheongju","Sejong","Bucheon","Siheung","Bucheon",
"Gwangmyeong","Ansan","Anyang","Ansan","Siheung","Ansan",
"Gunpo","Namyangju","Uijeongbu","Namyangju","Chuncheon",
"Namyangju","Hanam","Namyangju","Guri","Cheonan","Pyeongtaek",
"Cheonan","Sejong","Cheonan","Asan","Cheonan","Anseong",
"Jeonju","Gimje","Gimhae","Yangsan","Gimhae","Miryang",
"Pyeongtaek","Asan","Pyeongtaek","Osan","Pyeongtaek","Anseong",
"Pyeongtaek","Dangjin","Anyang","Siheung","Anyang","Gwangmyeong",
"Anyang","Gunpo","Anyang","Gwacheon","Siheung","Gwangmyeong",
"Siheung","Gunpo","Pohang","Yeongcheon","Pohang","Gyeongju",
"Jeju","Gimpo","Jeju","Mokpo","Jeju","Seogwipo","Uijeongbu",
"Yangju","Uijeongbu","Pocheon","Paju","Yangju","Gumi","Gimcheon",
"Gumi","Sangju","Gwangju-si","Hanam","Gwangju-si","Icheon",
"Gwangju-si","Yeoju","Sejong","Gongju","Wonju","Chungju",
"Wonju","Jecheon","Wonju","Yeoju","Jinju","Sacheon", "Yangsan",
"Miryang","Asan","Gongju","Iksan","Gunsan","Iksan","Nonsan",
"Iksan","Gimje","Chuncheon","Pocheon","Gyeongsan","Yeongcheon",
"Gunpo","Uiwang","Suncheon","Yeosu","Suncheon","Gwangyang",
"Gunsan","Gimje","Gyeongju","Yeongcheon","Geoje","Tongyeong",
"Osan","Anseong","Yangju","Pocheon","Yangju","Dongducheon",
"Icheon","Anseong","Icheon","Yeoju","Mokpo","Naju","Chungju",
"Jecheon","Chungju","Yeoju","Chungju","Mungyeong","Gangneung",
"Donghae","Gangneung","Sokcho","Seosan","Dangjin","Andong",
"Yeongju","Pocheon","Dongducheon","Gimcheon","Sangju","Tongyeong",
"Sacheon","Nonsan","Gongju","Nonsan","Boryeong","Nonsan",
"Gyeryong","Gongju","Boryeong","Gongju","Gyeryong","Jeongeup",
"Gimje","Yeongju","Mungyeong","Yeongju","Taebaek","Sangju",
"Mungyeong","Sokcho","Samcheok","Samcheok","Taebaek",
"Suncheon","Gwangju"), directed=F)
E(g1)$distance <- c(27, 16, 20, 19, 20, 24, 14, 20, 15, 15, 36, 18, 299, 18, 53,
25, 8, 12, 440, 18, 36, 13, 33, 33, 31, 26, 15, 20, 13, 20,
19, 18, 13, 16, 10, 33, 36, 51, 24, 31, 28, 21, 23, 27, 22,
11, 12, 24, 18, 52, 27, 11, 13, 19, 13, 14, 34, 20, 23, 38,
18, 12, 9, 12, 7, 10, 19, 53, 11, 8, 20, 27, 11, 26, 24, 18,
33, 25, 18, 15, 44, 14, 12, 4, 5, 12, 12, 37, 21, 458, 146,
27, 10, 23, 24, 21, 36, 14, 23, 36, 21, 39, 33, 26, 20, 32,
40, 20, 29, 18, 47, 24, 4, 27, 19, 22, 29, 17, 24, 18, 13,
32, 18, 37, 28, 43, 51, 33, 56, 20, 28, 12, 30, 38, 29, 47,
17, 47, 22, 26, 46, 51, 20, 10, 36,63)
plot(g1, edge.label=E(g1)$distance,
vertex.label.cex=0.6, vertex.size=4)
igraph plot
Using trick from https://or.stackexchange.com/questions/5555/tsp-with-repeated-city-visits
library(data.table)
library(purrr)
library(TSP)
library(igraph)
We need to create distance matrix based on shortest paths for each pair of vertices:
vertex_names <- names(V(g1))
N <- length(vertex_names)
dt <- map(
head(seq_along(vertex_names), -1),
~data.table(
from = vertex_names[[.x]],
to = vertex_names[(.x+1):N],
path = map(
shortest_paths(g1, vertex_names[[.x]], vertex_names[(.x+1):N])[["vpath"]],
names
)
),
) %>%
rbindlist()
then we calculate distances of shortest paths:
m <- as_adjacency_matrix(g1, type = "both", attr = "distance", sparse = FALSE)
dt[, weight := map_dbl(path, ~sum(m[embed(.x, 2)[, 2:1, drop=FALSE]]))]
now we assemble new matrix:
dt <- rbind(
dt, dt[, .(from = to, to = from, path = map(path, rev), weight = weight)]
)
new_m <- matrix(0, N, N)
rownames(new_m) <- colnames(new_m) <- vertex_names
new_m[as.matrix(dt[, .(from,to)])] <- dt[["weight"]]
on this new matrix we use some heuristic to solve TSP (for exact solution you should use method="concorde"):
res <- new_m %>%
TSP() %>%
solve_TSP(repetitions = 1000, two_opt = TRUE)
now we exchange each pair of consecutive cities with shortest path:
start_city <- "Seoul"
path_dt <- c(start_city, labels(cut_tour(res, start_city)), start_city) %>%
embed(2) %>%
.[,2:1,drop = FALSE] %>%
"colnames<-"(c("from", "to")) %>%
as.data.table()
path_dt <- dt[path_dt, on = .(from ,to)]
my_path <- c(unlist(map(path_dt[["path"]], head, -1)), start_city)
my_path is heuristic solution with distance tour_length(res)
I am using ggpubr to combine multiple graphs in a single plot, but cannot seem to correctly generate one graph with the title that I would like. I would like the title to say "Customized legend," given that it is a common legend for both graphs. Does anybody know how I can do this?
Here is my data:
data1 = data.frame(var1 = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
5,
5,
5,
5,
6,
6,
6,
6,
7,
7,
7,
7,
8,
8,
8,
8,
9,
9,
9,
9,
10,
10,
10,
10,
11,
11,
11,
11,
12,
12,
12,
12,
13,
13,
13,
13,
14,
14,
14,
14,
15,
15,
15,
15,
16,
16,
16,
16,
17,
17,
17,
17,
18,
18,
18,
18,
19,
19,
19,
19,
20,
20,
20,
20,
21,
21,
21,
21,
22,
22,
22,
22,
23,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
25,
26,
26,
26,
26,
27,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33),
var2 = c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4),
var3 = c(113,
89,
99,
41,
72,
64,
39,
139,
135,
17,
3,
135,
63,
126,
34,
87,
84,
125,
123,
18,
115,
11,
68,
85,
48,
95,
56,
129,
41,
78,
82,
122,
124,
4,
60,
132,
67,
128,
46,
79,
110,
88,
19,
88,
88,
126,
30,
11,
52,
66,
15,
52,
6,
74,
14,
101,
88,
70,
58,
20,
104,
76,
134,
23,
40,
1,
47,
25,
49,
110,
96,
100,
106,
26,
93,
19,
87,
41,
13,
40,
63,
87,
137,
105,
89,
95,
24,
49,
112,
92,
45,
105,
112,
105,
114,
129,
84,
33,
95,
95,
15,
90,
1,
62,
20,
7,
18,
96,
4,
71,
42,
94,
45,
102,
55,
98,
124,
80,
76,
97,
41,
31,
25,
21,
135,
138,
121,
93,
17,
13,
49,
26))
data2 <- data.frame(var1a = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
5,
5,
5,
5,
6,
6,
6,
6,
7,
7,
7,
7,
8,
8,
8,
8,
9,
9,
9,
9,
10,
10,
10,
10,
11,
11,
11,
11,
12,
12,
12,
12,
13,
13,
13,
13,
14,
14,
14,
14,
15,
15,
15,
15,
16,
16,
16,
16,
17,
17,
17,
17,
18,
18,
18,
18,
19,
19,
19,
19,
20,
20,
20,
20,
21,
21,
21,
21,
22,
22,
22,
22,
23,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
25,
26,
26,
26,
26,
27,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33),
var2a = c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4),
var3a = c(113,
89,
99,
41,
72,
64,
39,
139,
135,
17,
3,
135,
63,
126,
34,
87,
84,
125,
123,
18,
115,
11,
68,
85,
48,
95,
56,
129,
41,
78,
82,
122,
124,
4,
60,
132,
67,
128,
46,
79,
110,
88,
19,
88,
88,
126,
30,
11,
52,
66,
15,
52,
6,
74,
14,
101,
88,
70,
58,
20,
104,
76,
134,
23,
40,
1,
47,
25,
49,
110,
96,
100,
106,
26,
93,
19,
87,
41,
13,
40,
63,
87,
137,
105,
89,
95,
24,
49,
112,
92,
45,
105,
112,
105,
114,
129,
84,
33,
95,
95,
15,
90,
1,
62,
20,
7,
18,
96,
4,
71,
42,
94,
45,
102,
55,
98,
124,
80,
76,
97,
41,
31,
25,
21,
135,
138,
121,
93,
17,
13,
49,
26))
Here is the code that I am using:
#Open packages
library(ggplot2)
library(ggpubr)
#Set the theme
theme_set(theme_pubr())
#Change necessary columns to factor
data1$var2 <- factor(data1$var2, levels = c(1,2,3,4))
data2$var2a <- factor(data2$var2a, levels = c(1,2,3,4))
#Generate the plots
#Generate plots
plot1 <- ggplot(data1, aes(x = var1, y = var3, group = var2)) +
geom_line(size = 1.5, aes(linetype = var2, color = var2)) +
xlab('x_label') +
ylab('y_label')+
scale_fill_discrete(name = 'customized legend')
plot2 <- ggplot(data2, aes(x = var1a, y = var3a, group = var2a)) +
geom_line(size = 1.5, aes(linetype = var2a, color = var2a)) +
xlab('x_label') +
ylab('y_label')+
scale_fill_discrete(name = 'customized legend')
#Combine both into one picture
fig <- ggarrange(plot1, plot2,
ncol = 2,
nrow = 1,
common.legend = TRUE,
legend = "bottom")
fig
Since you didn't use the fill aesthetic in your ggplot, you should not use scale_fill_discrete. What you need is to set the legend title of linetype and color to "customized legend", since those are the aesthetics that you used.
library(ggplot2)
library(ggpubr)
plot1 <- ggplot(data1, aes(x = var1, y = var3, group = var2)) +
geom_line(size = 1.5, aes(linetype = var2, color = var2)) +
xlab('x_label') +
ylab('y_label') +
labs(linetype = "customized legend", color = "customized legend")
plot2 <- ggplot(data2, aes(x = var1a, y = var3a, group = var2a)) +
geom_line(size = 1.5, aes(linetype = var2a, color = var2a)) +
xlab('x_label') +
ylab('y_label') +
labs(linetype = "customized legend", color = "customized legend")
#Combine both into one picture
ggarrange(plot1, plot2,
ncol = 2,
nrow = 1,
common.legend = TRUE,
legend = "bottom")
I have a lot of financial trading data with around a million rows and I want to be able to condense this into a new data frame with a list of Unique UserIDs. I then want to be able to add up the "trades" for their account, with some conditions, ie if TransactionTypeId == 2 & AC_Type== 19. I would use a sumifs in excel for this but the size of the file means its pretty much impossible to run on my computer.
df<- structure(list(UserId = c(1, 1, 1, 1, 2,
2, 2, 3, 3, 3, 4, 5, 6,
6, 6, 7, 7, 7, 8, 8, 8,
8, 8, 9, 9, 9, 10, 11, 12,
12, 13, 13, 13, 14, 14, 15, 15,
16, 16, 16), TransactionTypeId = c(14, 1, 1, 70,
15, 1, 1, 14, 1, 1, 70, 14, 14, 1, 1, 14, 1, 1, 14, 1, 1, 1,
1, 14, 1, 1, 14, 14, 1, 1, 14, 1, 1, 1, 1, 70, 70, 14, 1, 1),
AC_Type = c(21, 21, 21, 21, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 21, 21, 21, 19, 19, 19, 19, 19, 19, 19, 19, 20,
19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20), Trades = c(30,
30, 0.00067116, 0.00067115, 249, 249, 0.00533033, 48.75,
48.75, 0.00101298, 0.00533, 24.37, 146.25, 146.25, 0.00309109,
100.01, 100.01, 0.00233551, 97.5, 90, 0.00189134, 5, 0.00245851,
234, 234, 0.00500802, 100.01, 48.75, 48.5, 0.0275474, 24,
24, 0.00051975, 100, 0.00223998, 0.00051975, 0.00205, 9.75,
8.75, 0.00017811)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
You can take sum of the logical condition that you want to count.
library(dplyr)
df %>%
group_by(UserId) %>%
summarise(count = sum(Trades[TransactionTypeId == 2 & AC_Type== 19]))
Not quite sure what you want ...
libary(dplyr)
df %>%
group_by(UserId) %>%
filter(TransactionTypeId == 1 & AC_Type == 19) %>%
summarise(sum = sum(Trades))
# A tibble: 6 x 2
UserId sum
<dbl> <dbl>
1 2 249.
2 3 48.8
3 6 146.
4 8 95.0
5 9 234.
6 12 48.5
Here you first group_by UserId, then filterthose rows that meet your conditions (NB: I've changed 2to 1 as there aren't any 2s in the sample data), and finally summarise by summing up the values in Trades.
Using data.table
library(data.table)
setDT(df)[, .(count = sum(Trades[TransactionTypeId == 2 &
AC_Type== 19], na.rm = TRUE)), UserId]
I'm trying to make a surface graph but I can not do it. The graphic is not pretty and I have tried in several forums how to do and I did not succeed.
library(scatterplot3d)
x1 <- rep(10, 6)
x2 <- rep(15, 6)
x3 <- rep(20, 6)
x4 <- rep(25, 6)
x5 <- rep(30, 6)
x <- c(x1, x2, x3, x4, x5)
y1 <- rep(7, 30)
y2 <- rep(21, 30)
y3 <- rep(35, 30)
y <- c(y1, y2, y3)
z = 1781.166805 + 52.445903*y + 203.454647*x -1.570445*x*y -4.119635*(x**2)
scatterplot3d(x, y, z)
I'd highly appreciate if you help me!
First off, for future postings, please use the code tags to properly format your code. Secondly, your formula for z is not valid R syntax.
Lastly, I'd strongly recommend to spend some time taking the tour and learning how to ask good questions.
scatterplot3d allows you to plot a three dimensional point cloud, not a surface. Based on the (poorly formatted) data you provide, this works just fine:
library(scatterplot3d);
z <- 1781.166805 + 52.445903 * y + 203.454647 * x -1.570445 * x * y -4.119635 * x^2;
scatterplot3d(x, y, z);
Update
If you want to have a 3d surface (mesh) plot, you can use e.g. plotly:
require(plotly);
plot_ly(x = ~x, y = ~y, z = ~z, type = "mesh3d");
which produces an interactive (rotatable, zoomable) plot, a screenshot of which looks like this:
Sample data
x <- c(10, 10, 10, 10, 10, 10, 15, 15, 15, 15, 15, 15, 20, 20, 20,
20, 20, 20, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30, 30, 30, 10,
10, 10, 10, 10, 10, 15, 15, 15, 15, 15, 15, 20, 20, 20, 20, 20,
20, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30, 30, 30, 10, 10, 10,
10, 10, 10, 15, 15, 15, 15, 15, 15, 20, 20, 20, 20, 20, 20, 25,
25, 25, 25, 25, 25, 30, 30, 30, 30, 30, 30);
y <- c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35);
I was trying to plot a metaregression for proportions using the meta package. The metaregression using metaprop works as expected. But when I run bubble, I get the error listed below the script:
library(meta)
sample <- c(74, 62,370, 72, 40, 84, 290, 244, 173, 106, 89, 139, 43, 398, 179, 31)
BLIPS <- c(23, 12, 11, 11, 1, 17, 52, 28, 6, 4, 3, 4, 1, 56, 22, 1)
covar <- c(21, 11, 14, 1, 4, 47, 2, 42, 16, 44, 3, 34, 11, 15, 21, 4)
hr <- data.frame(sample, BLIPS, covar)
meta <- metaprop(BLIPS, sample)
reg <- metareg(meta, covar)
reg
bubble(reg)
Error in [.data.frame(x$.meta$x$data, , covar.name) : undefined
columns selected
Currently your metaregression uses the variables from the global environment and not the variables from your data.frame hr. This appears to work as for the regression itself, but not for the bubble plot. If you just add data = hr to your metaprop call, then the bubble plot works as expected.
hr <- data.frame(sample, BLIPS, covar)
meta <- metaprop(BLIPS, sample, data = hr)
reg <- metareg(meta, covar)
reg
bubble(reg)