I have several dataframes that share the same structure but have different column names. I want to merge them all into one dataframe, but if i use bind_rows() it creates new column names.
I tried smartbind(), union() , union_all() and other libraries, however, none of them is able to simply merge them.
Here goes some sample data:
df1 <- structure(list(Codigo_Cliente = c(292640L, 48296L, 28368L, 27631L,
21715L, 401076L), Segmento = structure(c(3L, 3L, 3L, 3L, 3L,
5L), .Label = c("Clasico", "Emergente", "Mi_Negocio", "Preferencial",
"Prestige"), class = "factor"), Sal_Cons_CA_2018 = c(115966976.4748,
41404074.5338, 21576406.4326, NA, 5217387.0461, NA), Sal_Cons_CA_2019 = c(233057582.7658,
146012775.8314, 121273292.4548, 72383484.8781, 76605696.1462,
64418761.5503), Tipo_Cliente = structure(c(2L, 2L, 2L, 2L, 2L,
1L), .Label = c("Nuevo", "Viejo"), class = "factor"), diferencia_anual = c(117090606.291,
104608701.2976, 99696886.0222, 72383484.8781, 71388309.1001,
64418761.5503), peso_cambio = c(11.7925653553277, 10.5354732191076,
10.040788765049, 7.28996973463426, 7.18974243396645, 6.48781725327502
), cum = c(117090606.291, 221699307.5886, 321396193.6108, 393779678.4889,
465167987.589, 529586749.1393), cum_cambio = c(11.7925653553277,
22.3280385744352, 32.3688273394842, 39.6587970741185, 46.8485395080849,
53.33635676136), ones = c(1, 1, 1, 1, 1, 1), clientes = c(1,
2, 3, 4, 5, 6), porcentaje_acumulado_clientes = c(0.040650406504065,
0.0813008130081301, 0.121951219512195, 0.16260162601626, 0.203252032520325,
0.24390243902439), Tipo_Aportante = c("Viejo Aportante", "Viejo Aportante",
"Viejo Aportante", "Nuevo Aportante", "Viejo Aportante", "Nuevo Aportante"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L), groups = structure(list(Codigo_Cliente = c(21715L, 27631L,
28368L, 48296L, 292640L, 401076L), Segmento = structure(c(3L,
3L, 3L, 3L, 3L, 5L), .Label = c("Clasico", "Emergente", "Mi_Negocio",
"Preferencial", "Prestige"), class = "factor"), .rows = list(
5L, 4L, 3L, 2L, 1L, 6L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
df2 <- structure(list(Codigo_Cliente = c(29460L, 208833L, 494610L, 292653L,
371679L, 54042L), Segmento = structure(c(3L, 3L, 3L, 3L, 3L,
3L), .Label = c("Clasico", "Emergente", "Mi_Negocio", "Preferencial",
"Prestige"), class = "factor"), Sal_Cons_CC_2018 = c(249412694.49,
226519.47, NA, 232072.25, 893861.14, 2305969.41), Sal_Cons_CC_2019 = c(492333714.52,
217220231.86, 140551673.22, 73744015.83, 57995686.81, 54669407.01
), Tipo_Cliente = structure(c(2L, 2L, 1L, 2L, 2L, 2L), .Label = c("Nuevo",
"Viejo"), class = "factor"), diferencia_anual = c(242921020.03,
216993712.39, 140551673.22, 73511943.58, 57101825.67, 52363437.6
), peso_cambio = c(30.7889911838579, 27.5028381525124, 17.8142024395939,
9.31726115143663, 7.23736301995891, 6.63679667747068), cum = c(242921020.03,
459914732.42, 600466405.64, 673978349.22, 731080174.89, 783443612.49
), cum_cambio = c(30.7889911838579, 58.2918293363703, 76.1060317759641,
85.4232929274008, 92.6606559473597, 99.2974526248303), ones = c(1,
1, 1, 1, 1, 1), clientes = c(1, 2, 3, 4, 5, 6), porcentaje_acumulado_clientes = c(0.0369822485207101,
0.0739644970414201, 0.11094674556213, 0.14792899408284, 0.18491124260355,
0.22189349112426), Tipo_Aportante = c("Viejo Aportante", "Viejo Aportante",
"Nuevo Aportante", "Viejo Aportante", "Viejo Aportante", "Viejo Aportante"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L), groups = structure(list(Codigo_Cliente = c(29460L, 54042L,
208833L, 292653L, 371679L, 494610L), Segmento = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("Clasico", "Emergente", "Mi_Negocio",
"Preferencial", "Prestige"), class = "factor"), .rows = list(
1L, 6L, 2L, 4L, 5L, 3L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
You can use data.table package, which has rbindlist function:
df <- rbindlist(list(df1,df2), use.names = T)
Related
I have been trying to find a solution to this, but have been unsuccessful as I am not 100% sure what is causing the issue. As this is the last little thing keeping me from finsihing this project, any ideas would be sooo much appreciated!
My ggplot legend looks like this:
As you can see, the linetype and colour are displayed combined, but there seems to be an additional solid line going through the linetypes, making them hard to read. I think this comes from an additional geom_line call to display the grey lines (raw data).
Here is my code minus the theme and labelling
ggplot(table_mean_a, mapping= aes(x = phase_bins, y = Mean)) +
geom_point(aes(x = phase_bins, y = Mean, colour = cond_f))+
geom_line(df_plotting_a,
mapping = aes(x=phase_bins, y = mean_change_to_base, group=interaction(ID, cond_f)),
alpha = 0.3, size =0.5, colour = "grey", inherit.aes = FALSE)+
geom_hline(yintercept=0, colour="#666766", linetype = "dashed")+
geom_errorbar(subset(table_mean_a, phase_bins!= "baseline"),
mapping=aes(ymin=Mean-SD, ymax=Mean+SD, colour = cond_f),
width=.4, size = 0.7)+
geom_line(aes(group=as.numeric(cond_f), colour = cond_f, linetype=cond_f), size = 1.2)+
facet_wrap(sample~., labeller=labeller(sample = labels), ncol=2, scales = "free_x") +
scale_colour_manual(values=apa)+
labs(colour = "Condition:", linetype = "Condition:")+
guides(colour = guide_legend(override.aes = list(shape = NA, alpha = 1), byrow = TRUE))+
scale_x_discrete(labels= c("base", "stim1", "stim2", "recovery", "break"), drop = T, expand = c(0,0.25))+
scale_y_continuous(limits = c(-1,1), breaks = c(-1,-0.5,0,0.5,1),
labels = scales::number_format(accuracy = 1))+
scale_linetype_manual(values=c("solid", "81", "11", "5111"))
Here is the data
table_mean_a <-
structure(list(cond_f = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L), .Label = c("artificial", "cry", "laugh", "babble"
), class = "factor"), phase_bins = structure(c(2L, 3L, 4L, 5L,
6L, 2L, 3L, 4L, 5L, 6L), .Label = c("pre", "baseline", "stim_bin1",
"stim_bin2", "recovery", "break"), class = "factor"), age_group = c("adult",
"adult", "adult", "adult", "adult", "adult", "adult", "adult",
"adult", "adult"), sample = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("UKi", "UGi", "UKa"), class = "factor"),
Mean = c(0, -0.0616666666666667, -0.143333333333333, -0.0983333333333328,
-0.0231666666666662, 0, -0.0949999999999997, -0.1625, -0.113333333333333,
-0.155833333333333), SD = c(0, 0.154096119467973, 0.24172866718288,
0.423777912706934, 0.633257128379362, 0, 0.13934894565432,
0.248908435688157, 0.350921926972951, 0.677239732274149),
Min. = c(0, -0.366666666666667, -0.583333333333337, -0.716666666666667,
-0.98, 0, -0.466666666666665, -0.650000000000003, -0.783333333333337,
-1.96), Max. = c(0, 0.283333333333334, 0.383333333333336,
1.08333333333334, 1.72666666666667, 0, 0.199999999999996,
0.233333333333333, 0.5, 0.846666666666667), count = c(20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L)), row.names = c(NA,
-10L), groups = structure(list(cond_f = structure(c(1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("artificial", "cry",
"laugh", "babble"), class = "factor"), phase_bins = structure(c(2L,
3L, 4L, 5L, 6L, 2L, 3L, 4L, 5L, 6L), .Label = c("pre", "baseline",
"stim_bin1", "stim_bin2", "recovery", "break"), class = "factor"),
age_group = c("adult", "adult", "adult", "adult", "adult",
"adult", "adult", "adult", "adult", "adult"), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df_plotting_a <-
structure(list(ID = structure(c(69L, 69L, 69L, 69L, 69L), .Label = c("UG201",
"UG208", "UG209", "UG211", "UG215", "UG217", "UG219", "UG220",
"UG221", "UG222", "UG228", "UG243", "UG247", "UG254", "UG268",
"UG271", "UG272", "UG273", "UG274", "UG275", "UG280", "UG283",
"UG284", "UG286", "UG297", "UG299", "UG308", "UG310", "UG315",
"UG316", "UG330", "UG331", "UG334", "UG335", "UK103", "UK104",
"UK105", "UK106", "UK107", "UK108", "UK110", "UK111", "UK112",
"UK113", "UK115", "UK116", "UK117", "UK119", "UK122", "UK123",
"UK130", "UK132", "UK135", "UK136", "UK138", "UK139", "UK140",
"UK142", "UK145", "UK147", "UK150", "UK153", "UK155", "UK156",
"UK159", "UK160", "UK162", "UK164", "UKA102", "UKA103", "UKA104",
"UKA105", "UKA106", "UKA107", "UKA108", "UKA109", "UKA110", "UKA111",
"UKA112", "UKA113", "UKA114", "UKA115", "UKA116", "UKA117", "UKA119",
"UKA120", "UKA121", "UKA122"), class = "factor"), sex = structure(c(2L,
2L, 2L, 2L, 2L), .Label = c("f", "m"), class = "factor"), trial = structure(c(1L,
1L, 1L, 1L, 1L), .Label = c("1", "2", "3", "4"), class = "factor"),
cond_f = structure(c(4L, 4L, 4L, 4L, 4L), .Label = c("artificial",
"cry", "laugh", "babble"), class = "factor"), age_group = c("adult",
"adult", "adult", "adult", "adult"), stimulus = structure(c(7L,
7L, 7L, 7L, 7L), .Label = c("a1", "a2", "a3", "a4", "b1",
"b2", "b3", "b4", "c1", "c2", "c3", "c4", "l1", "l2", "l3",
"l4"), class = "factor"), phase_bins = structure(2:6, .Label = c("pre",
"baseline", "stim_bin1", "stim_bin2", "recovery", "break"
), class = "factor"), mean_change_to_base = c(0, -0.149999999999997,
-0.399999999999996, -0.299999999999996, -0.0733333333333306
), max_change_to_base = c(0, -0.333333333333329, -0.533333333333331,
-0.43333333333333, -0.333333333333329), sample = structure(c(3L,
3L, 3L, 3L, 3L), .Label = c("UKi", "UGi", "UKa"), class = "factor")), row.names = c(NA,
-5L), groups = structure(list(ID = structure(69L, .Label = c("UG201",
"UG208", "UG209", "UG211", "UG215", "UG217", "UG219", "UG220",
"UG221", "UG222", "UG228", "UG243", "UG247", "UG254", "UG268",
"UG271", "UG272", "UG273", "UG274", "UG275", "UG280", "UG283",
"UG284", "UG286", "UG297", "UG299", "UG308", "UG310", "UG315",
"UG316", "UG330", "UG331", "UG334", "UG335", "UK103", "UK104",
"UK105", "UK106", "UK107", "UK108", "UK110", "UK111", "UK112",
"UK113", "UK115", "UK116", "UK117", "UK119", "UK122", "UK123",
"UK130", "UK132", "UK135", "UK136", "UK138", "UK139", "UK140",
"UK142", "UK145", "UK147", "UK150", "UK153", "UK155", "UK156",
"UK159", "UK160", "UK162", "UK164", "UKA102", "UKA103", "UKA104",
"UKA105", "UKA106", "UKA107", "UKA108", "UKA109", "UKA110", "UKA111",
"UKA112", "UKA113", "UKA114", "UKA115", "UKA116", "UKA117", "UKA119",
"UKA120", "UKA121", "UKA122"), class = "factor"), trial = structure(1L, .Label = c("1",
"2", "3", "4"), class = "factor"), cond_f = structure(4L, .Label = c("artificial",
"cry", "laugh", "babble"), class = "factor"), .rows = structure(list(
1:5), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
The legends from geom_errorbar and geom_line are overlapping together, therefore you see the solid line. You can use show.legend = FALSE in geom_errorbar to hide it.
Since some of your element used in ggplot is missing, here I only include the code for geom_errorbar.
geom_errorbar(subset(table_mean_a, phase_bins!= "baseline"),
mapping=aes(ymin=Mean-SD, ymax=Mean+SD, colour = cond_f),
width=.4, size = 0.7, show.legend = F)
I am trying to convert the following df from wide to long
Input:
structure(list(activity_level = structure(1:4, .Label = c("Sedentary",
"Lightly Active", "Moderately Active", "Very Active"), class = "factor"),
poor_sleepers = c(0.254032258064516, 0.258695652173913, 0.333333333333333,
0.253119429590018), normal_sleepers = c(0.332661290322581,
0.360869565217391, 0.318181818181818, 0.42602495543672),
excess_sleepers = c(0.413306451612903, 0.380434782608696,
0.348484848484849, 0.320855614973262)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
Using the following line and the melt function:
daily_sleep_byActivity_long <- melt(daily_sleep_byActivity, id.vars = "activity_level")
So that I get a result like this:
structure(list(user_type = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L), .Label = c("Sedentary", "Lightly Active",
"Fairly Active", "Very Active"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("bad_sleepers",
"normal_sleepers", "over_sleepers"), class = "factor"), value = c(0.32076169347384,
0.0601633003867641, 0.133333333333333, 0.175548589341693, 0.594379737474579,
0.778685002148689, 0.866666666666667, 0.824451410658307, 0.0848585690515807,
0.161151697464547, 0, 0)), row.names = c(NA, -12L), class = "data.frame")
As far as I can tell, my syntax is clear and the input has all the correct formats, but my out put tells me "names do not match previous names". Does anyone know if I have missed something? My research online hasn't addressed this question.
Hello all and thank you in advance.
I would like to add a new column to my pre-existing data frame where the values sourced from a second data frame based on certain conditions. The dataset I wish to add the new column to ("data_melt") has many different sample IDs (sample.#) under the variable column. Using a second dataset ("metadata") I want to add the pond names to the "data_melt" new column based on the sample-ids. The sample IDs are the same in both datasets.
My gut tells me there's an obvious solution but my head is pretty fried. Here is a toy example of my data_melt df (since its 25,000 observations):
> dput(toy)
structure(list(gene = c("serA", "mdh", "fdhB", "fdhA"), process = structure(c(1L,
1L, 1L, 1L), .Label = "energy", class = "factor"), category = structure(c(1L,
1L, 1L, 1L), .Label = "metabolism", class = "factor"), ko = structure(1:4, .Label = c("K00058",
"K00093", "K00125", "K00148"), class = "factor"), variable = structure(c(1L,
2L, 3L, 3L), .Label = c("sample.10", "sample.19", "sample.72"
), class = "factor"), value = c(0.00116, 2.77e-05, 1.84e-05,
0.0125)), row.names = c(NA, -4L), class = "data.frame")
And here is a toy example of my metadata df:
> dput(toy)
structure(list(sample = c("sample.10", "sample.19", "sample.72",
"sample.13"), pond = structure(c(2L, 2L, 1L, 1L), .Label = c("lower",
"upper"), class = "factor")), row.names = c(NA, -4L), class = "data.frame")
Thank you again!
We can use match from base R to create a numeric index to replace the values
toy$pond <- with(toy, out$pond[match(variable, out$sample)])
I believe merge will work here.
sss <- structure(list(gene = c("serA", "mdh", "fdhB", "fdhA"), process = structure(c(1L,
1L, 1L, 1L), .Label = "energy", class = "factor"), category = structure(c(1L,
1L, 1L, 1L), .Label = "metabolism", class = "factor"), ko = structure(1:4, .Label = c("K00058",
"K00093", "K00125", "K00148"), class = "factor"), variable = structure(c(1L,
2L, 3L, 3L), .Label = c("sample.10", "sample.19", "sample.72"
), class = "factor"), value = c(0.00116, 2.77e-05, 1.84e-05,
0.0125)), row.names = c(NA, -4L), class = "data.frame")
ss <- structure(list(sample = c("sample.10", "sample.19", "sample.72",
"sample.13"), pond = structure(c(2L, 2L, 1L, 1L), .Label = c("lower",
"upper"), class = "factor")), row.names = c(NA, -4L), class = "data.frame")
ssss <- merge(sss, ss, by.x = "variable", by.y = "sample")
You can use left_join() from the dplyr package after renaming sample to variable in the metadata data frame.
library(tidyverse)
data_melt <- structure(list(gene = c("serA", "mdh", "fdhB", "fdhA"),
process = structure(c(1L, 1L, 1L, 1L),
.Label = "energy",
class = "factor"),
category = structure(c(1L, 1L, 1L, 1L),
.Label = "metabolism",
class = "factor"),
ko = structure(1:4,
.Label = c("K00058", "K00093", "K00125", "K00148"),
class = "factor"),
variable = structure(c(1L, 2L, 3L, 3L),
.Label = c("sample.10", "sample.19", "sample.72"),
class = "factor"),
value = c(0.00116, 2.77e-05, 1.84e-05, 0.0125)),
row.names = c(NA, -4L),
class = "data.frame")
metadata <- structure(list(sample = c("sample.10", "sample.19", "sample.72", "sample.13"),
pond = structure(c(2L, 2L, 1L, 1L),
.Label = c("lower", "upper"),
class = "factor")),
row.names = c(NA, -4L),
class = "data.frame") %>%
# Renaming the column, so we can join the two data sets together
rename(variable = sample)
data_melt <- data_melt %>%
left_join(metadata, by = "variable")
I have problem ploting credibility interval like this:
My data structure is following,L1,L2,M,U1,U2 stand for 0.025quant,0.25quant,0.5quant,0.75quant,0.975quant,respectively.
`
structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")`
I referenced this answerenter link description here,but 'fill' seems only work for boxplot case.the code I tried so far is:
CI$AP=interaction(CI$approach,CI$param)
p=ggplot(CI,aes(y=AP))+geom_point(aes(x=M))
p=p+geom_segment(aes(x=L1,xend=U1,y=AP,yend=AP))
p=p+geom_segment(aes(x=L2,xend=U2,y=AP,yend=AP),size=1.5)
It is far away from what I want.
Many thanks!
How about the following:
ggplot(df, aes(x = param, y = M, colour = approach)) +
geom_point(position = position_dodge2(width = 0.3), size = 3) +
geom_linerange(
aes(ymin = L2, ymax = U2, x = param),
position = position_dodge2(width = 0.3),
size = 2) +
geom_linerange(
aes(ymin = L1, ymax = U1, x = param),
position = position_dodge2(width = 0.3),
size = 1) +
coord_flip() +
labs(x = "Parameter", y = "Estimate")
Sample data
df <- structure(list(approach = structure(c(1L, 2L, 1L, 2L, 1L, 2L), class = "factor", .Label = c("INLA",
"rjags")), param = structure(c(1L, 2L, 3L, 1L, 2L, 3L), class = "factor", .Label = c("alpha",
"beta", "sig2")), L1 = c(0.0844546867936143, 1.79242348175439,
0.163143886545317, 0.0754165380733685, 1.79067991488052, 3.66675821267498
), L2 = c(0.60090835904286, 1.95337968870806, 0.898159977552433,
0.606017177641373, 1.95260448314298, 4.07080184844179), M = c(0.870204161297956,
2.03768437879748, 2.20651061559405, 0.87408237273113, 2.03725552264872,
4.32531027636171), U2 = c(1.13905085248391, 2.12210930874551,
4.26836270504725, 1.66260576926063, 2.28900567640091, 5.10063756831338
), U1 = c(1.65214011950274, 2.28396345192398, 4.9109804477583,
1.1450384685802, 2.12117799328209, 4.55657971279654), AP = structure(c(1L,
4L, 5L, 2L, 3L, 6L), .Label = c("INLA.alpha", "rjags.alpha",
"INLA.beta", "rjags.beta", "INLA.sig2", "rjags.sig2"), class = "factor")), .Names = c("approach",
"param", "L1", "L2", "M", "U2", "U1", "AP"), row.names = c(NA,
-6L), class = "data.frame")
I'm trying to plot a network graph using the d3Network package. I tried organizing the data to match the instructions as they appear on the package website (and help page), but I still get a blank web page. Can anyone spot what I'm doing wrong?
library(d3Network)
g.top3000 <- structure(list(from = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 4L, 4L, 5L), .Label = c("afghanistan", "attack",
"people", "pres_bush", "taliban"), class = "factor"), to = structure(c(4L,
1L, 5L, 2L, 3L, 1L, 5L, 2L, 3L, 5L, 2L, 3L, 2L, 3L, 3L), .Label = c("people",
"taliban", "united_states", "attack", "pres_bush"), class = "factor"),
weight = c(4, 3, 2, 6, 5, 5, 2, 3, 6, 1, 1, 5, 2, 4, 4)), .Names = c("from",
"to", "weight"), row.names = c(NA, -15L), class = "data.frame")
top3000.nodes <- structure(list(name = structure(1:5, .Label = c("afghanistan",
"attack", "people", "pres_bush", "taliban"), class = "factor"),
id = c(1, 1, 1, 2, 2)), .Names = c("name", "id"), row.names = c(NA,
-5L), class = "data.frame")
d3ForceNetwork(Links = g.top3000, Nodes = top3000.nodes, Source = "from", Target = "to",
Value = "weight", NodeID = "name", Group = "id", width = 800, height = 400, opacity = 0.9,
file = "projekt2_terror_news_force.html")
Plotting a simple graph works just fine
d3SimpleNetwork(g.top3000, width = 800, height = 400, fontsize = 12, linkDistance = 200,
file = "projekt2_terror_news.html")
That's because
No entry of "united status" in the node list.
You may need to use numeric index instead of node name.
# add entry "united status"
top3000.nodes <- rbind(top3000.nodes, data.frame(name = "united_states", id = 3))
# from name to index
g.top3000$from2 <- sapply(as.character(g.top3000$from), function(x) which(x == top3000.nodes$name))-1
g.top3000$to2 <- sapply(as.character(g.top3000$to), function(x) {
i <- which(x == top3000.nodes$name)
if (length(i)) i else NA
}) -1
# use indices in "from2" and "to2"
d3ForceNetwork(Links = g.top3000, Nodes = top3000.nodes, Source = "from2", Target = "to2",
Value = "weight", NodeID = "name", Group = "id", width = 800, height = 400, opacity = 0.4,
file = "projekt2_terror_news_force.html", linkDistance = 200)