I'm trying to plot a network graph using the d3Network package. I tried organizing the data to match the instructions as they appear on the package website (and help page), but I still get a blank web page. Can anyone spot what I'm doing wrong?
library(d3Network)
g.top3000 <- structure(list(from = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 4L, 4L, 5L), .Label = c("afghanistan", "attack",
"people", "pres_bush", "taliban"), class = "factor"), to = structure(c(4L,
1L, 5L, 2L, 3L, 1L, 5L, 2L, 3L, 5L, 2L, 3L, 2L, 3L, 3L), .Label = c("people",
"taliban", "united_states", "attack", "pres_bush"), class = "factor"),
weight = c(4, 3, 2, 6, 5, 5, 2, 3, 6, 1, 1, 5, 2, 4, 4)), .Names = c("from",
"to", "weight"), row.names = c(NA, -15L), class = "data.frame")
top3000.nodes <- structure(list(name = structure(1:5, .Label = c("afghanistan",
"attack", "people", "pres_bush", "taliban"), class = "factor"),
id = c(1, 1, 1, 2, 2)), .Names = c("name", "id"), row.names = c(NA,
-5L), class = "data.frame")
d3ForceNetwork(Links = g.top3000, Nodes = top3000.nodes, Source = "from", Target = "to",
Value = "weight", NodeID = "name", Group = "id", width = 800, height = 400, opacity = 0.9,
file = "projekt2_terror_news_force.html")
Plotting a simple graph works just fine
d3SimpleNetwork(g.top3000, width = 800, height = 400, fontsize = 12, linkDistance = 200,
file = "projekt2_terror_news.html")
That's because
No entry of "united status" in the node list.
You may need to use numeric index instead of node name.
# add entry "united status"
top3000.nodes <- rbind(top3000.nodes, data.frame(name = "united_states", id = 3))
# from name to index
g.top3000$from2 <- sapply(as.character(g.top3000$from), function(x) which(x == top3000.nodes$name))-1
g.top3000$to2 <- sapply(as.character(g.top3000$to), function(x) {
i <- which(x == top3000.nodes$name)
if (length(i)) i else NA
}) -1
# use indices in "from2" and "to2"
d3ForceNetwork(Links = g.top3000, Nodes = top3000.nodes, Source = "from2", Target = "to2",
Value = "weight", NodeID = "name", Group = "id", width = 800, height = 400, opacity = 0.4,
file = "projekt2_terror_news_force.html", linkDistance = 200)
Related
I'm trying to add significance annotations to an errorbar plot with a factor x-axis and dodged groups within each level of the x-axis. It is a similar but NOT identical use case to this
My base errorbar plot is:
library(ggplot2)
library(dplyr)
pres_prob_pd = structure(list(x = structure(c(1, 1, 1, 2, 2, 2, 3, 3, 3), labels = c(`1` = 1,
`2` = 2, `3` = 3)), predicted = c(0.571584427222816, 0.712630712634987,
0.156061969566517, 0.0162388386564817, 0.0371877245103279, 0.0165022541901018,
0.131528946944238, 0.35927812866896, 0.0708662221985375), std.error = c(0.355802875027348,
0.471253661425626, 0.457109887762665, 0.352871728451576, 0.442646879181155,
0.425913568532558, 0.376552208691762, 0.48178172708116, 0.451758041335245
), conf.low = c(0.399141779923204, 0.496138837620712, 0.0701919316506831,
0.00819832576725402, 0.0159620304815404, 0.00722904089045731,
0.0675129352870401, 0.17905347369819, 0.030504893442457), conf.high = c(0.728233665534388,
0.861980236164486, 0.311759350126477, 0.031911364587827, 0.0842227723261319,
0.0372248587668487, 0.240584344249407, 0.590437963881823, 0.156035177669385
), group = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain",
"neutral", "uncertain"), class = "factor"), group_col = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", "neutral",
"uncertain"), class = "factor"), language = structure(c(2L, 2L,
2L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("english", "dutch", "german"
), class = "factor"), top = c(0.861980236164486, 0.861980236164486,
0.861980236164486, 0.0842227723261319, 0.0842227723261319, 0.0842227723261319,
0.590437963881823, 0.590437963881823, 0.590437963881823)), row.names = c(NA,
-9L), groups = structure(list(language = structure(1:3, .Label = c("english",
"dutch", "german"), class = "factor"), .rows = structure(list(
4:6, 1:3, 7:9), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
#dodge
pd = position_dodge(.75)
#plot
p = ggplot(pres_prob_pd,aes(x=language,y=predicted,color=group,shape=group)) +
geom_point(position=pd,size=2) +
geom_errorbar(aes(ymax=conf.high,ymin=conf.low),width=.125,position=pd)
p
What I want to do is annotate the plot such that the contrasts between group within each level of language are annotated for significance. I've plotted points representing the relevant contrasts and (toy) sig. annotations as follows:
#bump function
f = function(x){
v = c()
bump=0.025
constant = 0
for(i in x){
v = c(v,i+constant+bump)
bump = bump + 0.075
}
v
}
#create contrasts
combs = data.frame(gtools::combinations(3, 2, v=c("certain", "neutral", "uncertain"), set=F, repeats.allowed=F)) %>%
mutate(contrast=c("cont_1","cont_2","cont_3"))
combs = rbind(combs %>% mutate(language = 'english'),
combs %>% mutate(language='dutch'),
combs %>% mutate(language = "german")) %>%
left_join(select(pres_prob_pd,language:top)%>%distinct(),by='language') %>%
group_by(language)
#long transform and calc y_pos
combs_long = mutate(combs,y_pos=f(top)) %>% gather(long, probability, X1:X2, factor_key=TRUE) %>% mutate(language=factor(language,levels=c("english","dutch","german"))) %>%
arrange(language,contrast)
#back to wide
combs_wide =combs_long %>% spread(long,probability)
combs_wide$p = rep(c('***',"*","ns"),3)
#plot
p +
geom_point(data=combs_long,
aes(x = language,
color=probability,
shape=probability,
y=y_pos),
inherit.aes = T,
position=pd,
size=2) +
geom_text(data=combs_wide,
aes(x=language,
label=p,
y=y_pos+.025,
group=X1),
color='black',
position=position_dodge(.75),
inherit.aes = F)
What I am failing to achieve is plotting a line connecting each of the contrasts of group within each level of language, as is standard when annotating significant group-wise differences. Any help much appreciated!
I am working on generating a hierarchical edge plot where the edge's color/transparency/thickness varies by the column (pvalue) in my connect dataframe, however the color/transparency/thickness of the edges in the plot I generated don't always map to the values in column (pvalue). For example, subgroup1 and subgroup4 should have the strongest thickest connection (pvalue is E-280), when in fact they don't, rather the connection between subgroup3 and subgroup4 looks to be strongest.
This data generates a reproducible example:
> dput(vertices)
structure(list(name = structure(c(3L, 1L, 2L, 4L, 5L, 6L, 7L), .Label = c("gp1",
"gp2", "origin", "subgroup1", "subgroup2", "subgroup3", "subgroup4"
), class = "factor"), id = c(NA, NA, NA, 1L, 2L, 3L, 4L), angle = c(NA,
NA, NA, 0, -90, 0, -90), hjust = c(NA, NA, NA, 1, 1, 1, 1)), row.names = c(NA,
-7L), class = "data.frame")
> dput(hierarchy)
structure(list(from = structure(c(3L, 3L, 1L, 1L, 2L, 2L), .Label = c("gp1",
"gp2", "origin"), class = "factor"), to = structure(1:6, .Label = c("gp1",
"gp2", "subgroup1", "subgroup2", "subgroup3", "subgroup4"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
> dput(connect)
structure(list(from = structure(c(1L, 1L, 2L, 3L, 1L, 2L, 3L,
1L), .Label = c("subgroup1", "subgroup2", "subgroup3"), class = "factor"),
to = structure(c(1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L), .Label = c("subgroup2",
"subgroup3", "subgroup4"), class = "factor"), pvalue = c(1.68e-204,
1.59e-121, 9.32e-73, 9.32e-73, 1.59e-21, 9.32e-50, 9.32e-40,
9.32e-280)), class = "data.frame", row.names = c(NA, -8L))
and this is the code I used to make this example plot:
from <- match( connect$from, vertices$name)
to <- match( connect$to, vertices$name)
col <- connect$pvalue
#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id <- NA
myleaves <- which(is.na( match(vertices$name, hierarchy$from) ))
nleaves <- length(myleaves)
vertices$id[ myleaves ] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust <- ifelse( vertices$id < 41, 1, 0)
# flip angle BY to make them readable
vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05), size = 2, alpha = 0.8) +
geom_conn_bundle(data = get_con(from = from, to = to, col = col), aes(colour=col, alpha = col, width = col)) +
geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=3.5, alpha=0.6) +scale_edge_color_continuous(trans = "log",low="red", high="yellow")+ scale_edge_alpha_continuous(trans = "log",range = c(1, 0.1)) +scale_edge_width_continuous(trans = "log", range = c(4, 1))+
theme_void()
I think there is wrong mapping somewhere but I can't figure out where. Thank you so much for your input!
I believe there is a bug in this library. Rearranging the input data by the column of choice (pvalue in my case) in an ascending order helped but did not solve the issue.
connect_new <- arrange(connect, pvalue)
and I found the solution in a github issue submitted by another user. The subgroups within each group need to be ordered alphabetically in the hierarchy and vertices file. In addition, in the connect dataframe, the subgroups need to be ordered following the same order in the hierarchy and vertices file. Thanks to zhuxr11
I have several dataframes that share the same structure but have different column names. I want to merge them all into one dataframe, but if i use bind_rows() it creates new column names.
I tried smartbind(), union() , union_all() and other libraries, however, none of them is able to simply merge them.
Here goes some sample data:
df1 <- structure(list(Codigo_Cliente = c(292640L, 48296L, 28368L, 27631L,
21715L, 401076L), Segmento = structure(c(3L, 3L, 3L, 3L, 3L,
5L), .Label = c("Clasico", "Emergente", "Mi_Negocio", "Preferencial",
"Prestige"), class = "factor"), Sal_Cons_CA_2018 = c(115966976.4748,
41404074.5338, 21576406.4326, NA, 5217387.0461, NA), Sal_Cons_CA_2019 = c(233057582.7658,
146012775.8314, 121273292.4548, 72383484.8781, 76605696.1462,
64418761.5503), Tipo_Cliente = structure(c(2L, 2L, 2L, 2L, 2L,
1L), .Label = c("Nuevo", "Viejo"), class = "factor"), diferencia_anual = c(117090606.291,
104608701.2976, 99696886.0222, 72383484.8781, 71388309.1001,
64418761.5503), peso_cambio = c(11.7925653553277, 10.5354732191076,
10.040788765049, 7.28996973463426, 7.18974243396645, 6.48781725327502
), cum = c(117090606.291, 221699307.5886, 321396193.6108, 393779678.4889,
465167987.589, 529586749.1393), cum_cambio = c(11.7925653553277,
22.3280385744352, 32.3688273394842, 39.6587970741185, 46.8485395080849,
53.33635676136), ones = c(1, 1, 1, 1, 1, 1), clientes = c(1,
2, 3, 4, 5, 6), porcentaje_acumulado_clientes = c(0.040650406504065,
0.0813008130081301, 0.121951219512195, 0.16260162601626, 0.203252032520325,
0.24390243902439), Tipo_Aportante = c("Viejo Aportante", "Viejo Aportante",
"Viejo Aportante", "Nuevo Aportante", "Viejo Aportante", "Nuevo Aportante"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L), groups = structure(list(Codigo_Cliente = c(21715L, 27631L,
28368L, 48296L, 292640L, 401076L), Segmento = structure(c(3L,
3L, 3L, 3L, 3L, 5L), .Label = c("Clasico", "Emergente", "Mi_Negocio",
"Preferencial", "Prestige"), class = "factor"), .rows = list(
5L, 4L, 3L, 2L, 1L, 6L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
df2 <- structure(list(Codigo_Cliente = c(29460L, 208833L, 494610L, 292653L,
371679L, 54042L), Segmento = structure(c(3L, 3L, 3L, 3L, 3L,
3L), .Label = c("Clasico", "Emergente", "Mi_Negocio", "Preferencial",
"Prestige"), class = "factor"), Sal_Cons_CC_2018 = c(249412694.49,
226519.47, NA, 232072.25, 893861.14, 2305969.41), Sal_Cons_CC_2019 = c(492333714.52,
217220231.86, 140551673.22, 73744015.83, 57995686.81, 54669407.01
), Tipo_Cliente = structure(c(2L, 2L, 1L, 2L, 2L, 2L), .Label = c("Nuevo",
"Viejo"), class = "factor"), diferencia_anual = c(242921020.03,
216993712.39, 140551673.22, 73511943.58, 57101825.67, 52363437.6
), peso_cambio = c(30.7889911838579, 27.5028381525124, 17.8142024395939,
9.31726115143663, 7.23736301995891, 6.63679667747068), cum = c(242921020.03,
459914732.42, 600466405.64, 673978349.22, 731080174.89, 783443612.49
), cum_cambio = c(30.7889911838579, 58.2918293363703, 76.1060317759641,
85.4232929274008, 92.6606559473597, 99.2974526248303), ones = c(1,
1, 1, 1, 1, 1), clientes = c(1, 2, 3, 4, 5, 6), porcentaje_acumulado_clientes = c(0.0369822485207101,
0.0739644970414201, 0.11094674556213, 0.14792899408284, 0.18491124260355,
0.22189349112426), Tipo_Aportante = c("Viejo Aportante", "Viejo Aportante",
"Nuevo Aportante", "Viejo Aportante", "Viejo Aportante", "Viejo Aportante"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L), groups = structure(list(Codigo_Cliente = c(29460L, 54042L,
208833L, 292653L, 371679L, 494610L), Segmento = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("Clasico", "Emergente", "Mi_Negocio",
"Preferencial", "Prestige"), class = "factor"), .rows = list(
1L, 6L, 2L, 4L, 5L, 3L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
You can use data.table package, which has rbindlist function:
df <- rbindlist(list(df1,df2), use.names = T)
I have problem ploting credibility interval like this:
My data structure is following,L1,L2,M,U1,U2 stand for 0.025quant,0.25quant,0.5quant,0.75quant,0.975quant,respectively.
`
structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")`
I referenced this answerenter link description here,but 'fill' seems only work for boxplot case.the code I tried so far is:
CI$AP=interaction(CI$approach,CI$param)
p=ggplot(CI,aes(y=AP))+geom_point(aes(x=M))
p=p+geom_segment(aes(x=L1,xend=U1,y=AP,yend=AP))
p=p+geom_segment(aes(x=L2,xend=U2,y=AP,yend=AP),size=1.5)
It is far away from what I want.
Many thanks!
How about the following:
ggplot(df, aes(x = param, y = M, colour = approach)) +
geom_point(position = position_dodge2(width = 0.3), size = 3) +
geom_linerange(
aes(ymin = L2, ymax = U2, x = param),
position = position_dodge2(width = 0.3),
size = 2) +
geom_linerange(
aes(ymin = L1, ymax = U1, x = param),
position = position_dodge2(width = 0.3),
size = 1) +
coord_flip() +
labs(x = "Parameter", y = "Estimate")
Sample data
df <- structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")
The labels for the mosaic plot don't fit the screen ( they're partially cut) so id like to move/shift the plot to the right so that the labels fully fit -- tried using ''par'' function but to no avail -- any ideas?
structure(list(Road_Type = structure(c(4L, 4L, 4L, 4L, 4L, 4L
), .Label = c("Roundabout", "One way Street", "Dual Carriageway",
"Single carriageway", "Slip Road"), class = "factor"), Accident_Severity_combined = structure(c(2L,
2L, 2L, 2L, 1L, 2L), .Label = c("Serious", "Slight"), class = "factor")), .Names = c("Road_Type",
"Accident_Severity_combined"), row.names = c(NA, 6L), class = "data.frame")
>
mos <- mosaic(~Road_Type + Accident_Severity_combined, data = uk1, shade = TRUE, legend = TRUE,
labeling_args = list(set_varnames = c(Accident_Severity_combined="Gender", Road_Type="survival"),
highlighting_fill = c("darlblue","red")
labeling=labeling_border(
rot_labels = c(90, 0, 90, 0),
just_labels=c("left","left","right","right"),
tl_varnames = FALSE,
gp_labels = gpar(fontsize = 9)))