I really like this confusion plot, but I want the values where Freq is zero to be white.
gg %>% ggplot(aes(Prediction, Reference, fill= Freq)) +
geom_tile() + geom_text(aes(label=Freq)) +
scale_fill_gradient(low="#f8766d", high="#00ba38") +
labs(x = "Prediction",y = "Reference")
I tried filtering out the zero Freq, but it looks ugly:
gg %>% dplyr::filter(Freq != 0) %>%
ggplot(aes(Prediction, Reference, fill= Freq)) +
geom_tile() + geom_text(aes(label=Freq)) +
scale_fill_gradient(low="#f8766d", high="#00ba38") +
labs(x = "Prediction",y = "Reference")
Anyone know how I can keep the red/green on the diagonal/off diagonal, but make zeros white?
Here is the data:
gg <- structure(list(Prediction = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L), .Label = c("0", "1", "2", "3", "4", "5", "6",
"7", "8", "9"), class = "factor"), Reference = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L), .Label = c("0", "1", "2", "3", "4",
"5", "6", "7", "8", "9"), class = "factor"), Freq = c(99L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 97L, 2L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 1L, 93L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 0L, 0L,
1L, 96L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 3L, 0L, 94L, 0L, 0L,
2L, 0L, 1L, 2L, 0L, 0L, 2L, 1L, 85L, 1L, 0L, 6L, 3L, 0L, 0L,
1L, 0L, 1L, 1L, 95L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L,
89L, 0L, 8L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 97L, 0L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 2L, 2L, 92L)), class = "data.frame", row.names = c(NA,
-100L))
That was nice! I'm facing similar problems these days. If you want the 0 to appear, I think there are at least two ways of doing this:
The first one is by manipulating a bit the data and using the parameter na.value="white":
gg %>% mutate(Freq2 = ifelse(Freq == 0,NA,Freq)) %>%
ggplot(aes(Prediction, Reference, fill = Freq2)) +
geom_tile() +
geom_text(aes(label=Freq)) +
scale_fill_gradientn(colours = c("#f8766d", "#00ba38"),na.value="white") +
labs(x = "Prediction",y = "Reference", fill = "Freq")
The second one is by playing with the colours of the scale:
gg %>%
ggplot(aes(Prediction, Reference, fill = Freq)) +
geom_tile() +
geom_text(aes(label=Freq)) +
scale_fill_gradientn(colours = c("white","#f8766d", "#00ba38"), values = c(0,0.01,1)) +
labs(x = "Prediction",y = "Reference")
This solution has the downside of being quite sensible to the values, in the sense that if your data changes a lot, this won't lead to the same coloring. So the first solution is more robust.
To remove the 0's from the white cells, the best option is to set the label to an empty string in those cases. For exemple, using the first solution:
gg %>% mutate(Freq2 = ifelse(Freq == 0,NA,Freq)) %>%
ggplot(aes(Prediction, Reference, fill = Freq2)) +
geom_tile() +
geom_text(aes(label=ifelse(Freq == 0,"",Freq))) +
scale_fill_gradientn(colours = c("#f8766d", "#00ba38"),na.value="white") +
labs(x = "Prediction",y = "Reference", fill = "Freq")
One option is to use na.value after replacing the 0 to NA
library(ggplot2)
library(dplyr)
gg %>%
mutate(Freq = na_if(Freq, 0)) %>%
ggplot(aes(Prediction, Reference, fill= Freq)) +
geom_tile() +
geom_text(aes(label=Freq)) +
scale_fill_gradient(low="#f8766d", high="#00ba38",
na.value = 'white') +
labs(x = "Prediction",y = "Reference")
-output
If we need the 0 values as well, instead of creating a new column, can change the 0 to NA in fill using .data
ggplot(gg, aes(Prediction, Reference, fill = na_if(.data[["Freq"]], 0))) +
geom_tile() +
geom_text(aes(label=Freq)) +
scale_fill_gradient(low="#f8766d", high="#00ba38",
na.value = 'white') +
labs(x = "Prediction",y = "Reference", fill = "Freq")
Related
I would like to do vizualisation of 2 vectors (predikcia & test data) of all wrongly classified numbers from my classification problem, where i have 76 data in both vectors - first one (predikcia) has numbers from 0-9 what classificator wrongly predicted and in second vector (test data) are numbers what it should be. Basic plot of these vectors has not good representation or not giving some good information about what numbers were wrongly classified and what number they should be classified correctly. Here is a picture what is basic plot showing
data
classres <- data.frame(
predikcia = c(9L, 8L, 3L, 9L, 1L, 6L, 2L, 2L,
6L, 3L, 5L, 9L, 8L, 1L, 5L, 1L, 3L, 3L, 5L, 9L,
5L, 1L, 8L, 9L, 5L, 0L, 1L, 9L, 5L, 5L, 8L, 9L,
2L, 5L, 8L, 5L, 6L, 9L, 9L, 4L, 9L, 3L, 5L, 5L, 9L, 9L, 9L, 4L, 3L,
5L, 8L, 3L, 0L, 5L, 8L, 8L, 7L, 3L, 8L, 8L, 5L, 9L, 9L, 1L, 5L, 5L,
9L, 9L, 5L, 3L, 1L, 9L, 2L, 5L, 8L, 9L),
testdata = c(4L, 6L, 1L, 5L, 5L, 1L, 1L, 1L, 5L,
9L, 7L, 8L, 0L, 8L, 8L, 9L, 7L, 1L, 9L, 5L, 8L,
8L, 0L, 5L, 1L, 8L, 4L, 1L, 9L, 1L, 0L, 5L, 1L,
9L, 0L, 0L, 0L, 4L, 1L, 2L, 7L, 5L, 9L, 8L, 5L,
5L, 5L, 1L, 9L, 9L, 0L, 9L, 8L, 9L, 6L, 0L, 8L,
5L, 0L, 9L, 8L, 5L, 5L, 9L, 2L, 8L, 0L, 5L, 7L,
1L, 8L, 8L, 9L, 9L, 7L, 1L))
I'm assuming that there is either "correct" or "incorrect" predictions, otherwise the graph would need more work.
First, I have the data in which there are precitions and real values. In this examle they are integers, but I'm pretending that it does not mean anything.
classres <- data.frame(
predikcia = c(9L, 8L, 3L, 9L, 1L, 6L, 2L, 2L,
6L, 3L, 5L, 9L, 8L, 1L, 5L, 1L, 3L, 3L, 5L, 9L),
testdata = c(4L, 6L, 1L, 5L, 5L, 1L, 1L, 1L, 5L,
9L, 7L, 8L, 0L, 8L, 8L, 9L, 7L, 1L, 9L, 5L))
Then I create a count data-frame. The "factor" part is important because I want all the possible combinations to appear on the plot.
dat.plot <- classres %>%
count(testdata, predikcia) %>%
mutate(
testdata = factor(testdata, levels = 0:9),
predikcia = factor(predikcia, levels = 0:9))
Finally, I create a heatmap from the data coloring the inside of each cell with the count values and adding a border to the cells where predictions are considered correct (this is why I need the goodclass data-frame).
goodclass <- data.frame(
testdata = factor(0:9),
predikcia = factor(0:9)
)
dat.plot %>%
ggplot(aes(testdata, predikcia, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "goldenrod", high = "darkorchid4") +
geom_tile(data = goodclass,
aes(testdata, predikcia, color = "Correct\npredictions"),
inherit.aes = FALSE, fill = NA, size = 2) +
scale_color_manual(values = c(`Correct\npredictions` = "limegreen")) +
labs(x = "Real class value", y = "Predicted class value",
fill = "count", color = "") +
coord_equal() +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_line(color = "black", size = 2))
And the results hurst a little bit the eyes: it will probably need little bit more work to find more beautiful colors.
I want to produce a confusion matrix plot where the diagonal entries are green, the zero entries white, and the off-diagonal non-zero entries should be red.
This is the data:
gg <- structure(list(Prediction = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L), .Label = c("0", "1", "2", "3", "4", "5", "6",
"7", "8", "9"), class = "factor"), Reference = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L), .Label = c("0", "1", "2", "3", "4",
"5", "6", "7", "8", "9"), class = "factor"), Freq = c(93L, 7L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 100L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 2L, 89L, 6L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 1L, 98L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 26L, 0L, 0L, 71L, 0L, 0L,
3L, 0L, 0L, 2L, 69L, 0L, 0L, 1L, 25L, 0L, 3L, 0L, 0L, 6L, 64L,
0L, 0L, 0L, 0L, 30L, 0L, 0L, 0L, 1L, 13L, 0L, 0L, 0L, 0L, 0L,
86L, 0L, 0L, 3L, 96L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 4L, 37L,
0L, 0L, 5L, 0L, 0L, 54L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-100L))
In this example, the off-diagonal zeros are white. But how can I intentionally set the diagonal to green and non-zero off-diagonal red?
gg %>% dplyr::mutate(Freq2 = ifelse(Freq == 0,NA,Freq)) %>%
ggplot(aes(Prediction, Reference, fill = Freq2)) +
geom_tile() +
geom_text(aes(label=Freq)) +
scale_fill_gradientn(colours = c("#f8766d", "#00ba38"),na.value="white") +
labs(x = "Prediction",y = "Reference", fill = "Freq")
One option using scale_fill_identity -
library(dplyr)
library(ggplot2)
gg %>%
mutate(color = case_when(Prediction == Reference ~ 'green',
Freq == 0 ~ 'white',
TRUE ~ ' red')) %>%
ggplot(aes(Prediction, Reference, fill = color)) +
geom_tile() +
geom_text(aes(label=Freq)) +
scale_fill_identity() +
labs(x = "Prediction",y = "Reference")
Figured it out. Had to use scale_fill_manual
gg2 <- gg %>% dplyr::mutate(Freq2 = ifelse(Freq == 0,NA,Freq))
gg2[gg2$Prediction == gg2$Reference,]$Freq2 = "diag"
gg2[gg2$Prediction != gg2$Reference & !is.na(gg2$Freq2),]$Freq2 = "notDiag"
gg2 %>%
ggplot(aes(Prediction, Reference, fill = Freq2)) +
geom_tile() +
geom_text(aes(label=Freq)) +
scale_fill_manual(values=c("#00ba38", "#f8766d"),na.value="white")+
labs(x = "Prediction",y = "Reference", fill = "Freq")
I would like to change the order of my legend, and not to display them in alphabetical order as you can see below. I would like to have
"NONE","LIGHT","MEDIUM","HEAVY","V_COLD","COLD","MEDIUM","HOT".
Is it possible? I tried with several arguments but without success.
Below, my table :
structure(list(SOUNAME = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "BALLYSHANNON (CATHLEENS FALL)", class = "factor"),
year_month = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L), .Label = c("2013-03",
"2013-04", "2013-05", "2013-06", "2013-07", "2013-08", "2013-09",
"2013-10", "2013-12"), class = "factor"), pre_type = structure(c(4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L), .Label = c("HEAVY", "LIGHT", "MEDIUM",
"NONE"), class = "factor"), pre_value = c(13L, 2L, 11L, 5L,
9L, 3L, 10L, 7L, 2L, 6L, 13L, 10L, 10L, 1L, 15L, 4L, 16L,
2L, 7L, 5L, 2L, 2L, 17L, 9L, 7L, 3L, 13L, 6L, 5L, 2L, 10L,
14L, 1L, 5L, 19L, 6L), tem_type = structure(c(4L, 3L, 2L,
1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L,
2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L,
3L, 2L, 1L), .Label = c("COLD", "HOT", "MEDIUM", "V_COLD"
), class = "factor"), tem_value = c(0L, 7L, 0L, 23L, 0L,
29L, 0L, 1L, 0L, 29L, 2L, 0L, 0L, 21L, 9L, 0L, 0L, 5L, 25L,
0L, 0L, 18L, 13L, 0L, 0L, 21L, 9L, 0L, 0L, 26L, 5L, 0L, 0L,
24L, 0L, 7L), cnt_vehicle = c(NA, 2754406, NA, NA, NA, 2846039,
NA, NA, NA, 3149377, NA, NA, NA, 3058810, NA, NA, NA, 3362614,
NA, NA, NA, 3415716, NA, NA, NA, 3020812, NA, NA, NA, 3076665,
NA, NA, NA, 2775306, NA, NA), x = c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L
)), .Names = c("SOUNAME", "year_month", "pre_type", "pre_value",
"tem_type", "tem_value", "cnt_vehicle", "x"), row.names = c(NA,
-36L), class = "data.frame")
Below my graph:
ggplot(data = b_complet_2013, aes(x = x, y = pre_value*100000, fill = pre_type), stat = "identity") +
scale_x_continuous(breaks=(1:9)+0.2, labels=unique(b_complet_2013$year_month)) +
geom_bar(stat = "identity", width=0.3) +
xlab("date") + ylab ("Number of days of précipitations(left) and temperatures (ritght)") +
ggtitle("Precipitation per month") +
geom_bar(data=b_complet_2013,aes(x=x+0.4, y=tem_value*100000, fill=tem_type), width=0.3, stat = "identity") +
xlab("date") + ylab("Number of days of precipitations(left) and temperatures (ritght)") +
ggtitle("Impact of weather on road traffics") + theme( axis.title.y = element_text(color = "blue", face = "bold")) +
theme(axis.text.y = element_text(color = "blue", face = "bold", size=9)) + theme( axis.title.y.right = element_text(color = "black", face = "bold")) +
theme(axis.text.y.right = element_text(color = "black", size = 9, face = "bold")) +
geom_line(mapping = aes(x= x+0.2, y = as.numeric(cnt_vehicle)), colour = I("blue"), size = 0.8) +
geom_point(aes(x= x+0.2, y = as.numeric(cnt_vehicle), colour = I("blue")), show.legend=FALSE, stat = "identity") +
scale_y_continuous(sec.axis = sec_axis(~./100000,name="Number of days of precipitations(left) and temperatures (ritght)")) +
theme( plot.title = element_text(size = 17)) + theme(axis.title.x = element_text(size = 12)) + theme(axis.title.y = element_text(size = 12)) +
labs(y = "Number of vehicles", color ="black") +
theme(panel.background = element_rect(linetype = "dashed", fill="white"), plot.background = element_rect(linetype = "dashed",fill="grey90" ))
How to do I reorder the factor in my dPlot, as such that "Exceptional" is located at the most left, followed by "Fine", "Mediocre", "Trouble", "Danger", and "Critical".
Dataset:
dat<-structure(list(MainDept = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L,
5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("Business Development",
"Finance Department", "HR & Admin Department", "Manufacturing & Engineering Department",
"QA Department", "Supply Chain Department"), class = "factor"),
Level = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("Critical",
"Danger", "Exceptional", "Fine", "Mediocre", "Trouble"), class = "factor"),
Freq = c(0L, 0L, 0L, 29L, 0L, 0L, 0L, 0L, 0L, 108L, 0L, 0L,
0L, 0L, 2L, 7L, 12L, 15L, 0L, 7L, 15L, 16L, 73L, 59L, 12L,
0L, 0L, 191L, 0L, 0L, 11L, 0L, 0L, 128L, 0L, 0L)), .Names = c("MainDept",
"Level", "Freq"), row.names = c(NA, -36L), class = "data.frame")
The code to generate the plot:
require(rCharts)
plot<-dPlot(y="MainDept", x="Freq",data=dat,groups="Level",type="bar",width=1200)
plot$yAxis(type="addCategoryAxis")
plot$xAxis(type="addPctAxis")
plot$legend(
x = 0,
y = 0,
horizontalAlign = "right"
)
I tried with:
dat$Level<-as.character(dat$Level)
dat$Level[dat$Level=="Exceptional"]<-1
dat$Level[dat$Level=="Fine"]<-2
dat$Level[dat$Level=="Mediocre"]<-3
dat$Level[dat$Level=="Trouble"]<-4
dat$Level[dat$Level=="Danger"]<-5
dat$Level[dat$Level=="Critical"]<-6
dat$Level<-as.factor(dat$Level)
Thank you.
I don't know about dPlot, but if it relies on the order of the levels in the grouping variable you should try
dat$Level <- factor(dat$Level, levels = c("Exceptional", "Fine", "Mediocre",
"Trouble", "Danger", "Critical"))
As I do not know in which library dPlot can be found this is of course untested. Hence, for future reference, a minimal working example including the necessary libraries would help.
I hvar the following data set:
structure(c(2L, 6L, 2L, 6L, 7L, 7L, 2L, 7L, 6L, 8L, 8L, 4L, 8L,
2L, 9L, 8L, 7L, 6L, 9L, 1L, 9L, 4L, 9L, 3L, 2L, 10L, 9L, 10L,
8L, 10L, 7L, 6L, 10L, 1L, 2L, 12L, 9L, 8L, 12L, 1L, 11L, 10L,
2L, 44L, 79L, 10L, 8L, 47L, 45L, 51L, 9L, 11L, 74L, 75L, 77L,
69L, 75L, 77L, 78L, 2L, 44L, 44L, 46L, 46L, 8L, 6L, 1L, 1L, 6L,
7L, 1L, 4L, 7L, 8L, 8L, 1L, 4L, 8L, 3L, 8L, 8L, 9L, 9L, 9L, 1L,
9L, 5L, 9L, 3L, 9L, 9L, 9L, 10L, 8L, 10L, 7L, 10L, 10L, 1L, 10L,
10L, 9L, 12L, 12L, 1L, 12L, 12L, 12L, 12L, 7L, 7L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 7L, 7L, 7L, 7L, 44L,
10L, 9L, 42L, 43L, 46L, 46L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, -1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, -1L, 1L, 1L, 1L, -1L, -1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, -1L), .Dim = c(66L,
3L), .Dimnames = list(NULL, c("from", "to", "impact")))
The data set indicates, a connection between from and to with a positive (1) or negative (-1) impact.
I would like to plot a network graph with ggnet (or ggplot2) that plot this graph.
So far I have done the following steps:
library(network)
library(ggplot2)
library(ggnet)
library(grid)
net <- network(df2[,c(1,2)], directed = FALSE)
ggnet(net, mode = 'kamadakawai', size = 6, alpha = .5, label.nodes=F, segment.alpha = 0.5, color = "black") +
theme(legend.position = "none") +
theme(plot.margin = unit(c(0.1,0.1,0.1,0.1), "cm"))
This lead to this result:
I am wondering how to colorize the edges based on the impact in the dataset (1 = green and -1 = red). I am also wondering why there are so many unconnected nodes...
Can someone help me with this?
Thanks a lot.
First, I am using GGally::ggnet as this is available through CRAN. I believe this is equivalent to that on github.
library(network)
library(GGally)
library(ggplot2)
# dependencies
library(grid)
library(sna)
library(intergraph)
The reason you have many unconnected nodes in your network diagram is because the node names you have supplied are not an unbroken sequence of integers. For example, if you supply only an edge between nodes named 1 and 10, network() will assume the presence of eight unconnected nodes with names 2:9. For example,
netwk1 <- network(cbind(1,2), directed = F)
get.vertex.attribute(netwk1, attrname="vertex.names")
netwk2 <- network(cbind(1,10), directed = F)
get.vertex.attribute(netwk2, attrname="vertex.names")
So if you convert your node names to an unbroken sequence, you will lose all the unconnected nodes. So something like:
df2[,1:2]=as.numeric(as.factor(df2[,c(1,2)]))
net <- network(df2[,c(1,2)], directed = F)
ggnet(net, mode = 'kamadakawai', size = 6, alpha = .5, label.nodes=T, segment.alpha = 0.5, color = "black") +
theme(legend.position = "none") +
theme(plot.margin = unit(c(0.1,0.1,0.1,0.1), "cm"))
You can color the edges with the segment.color argument in ggnet() :
edge_color = ifelse(df2[,3]==-1, "red", "green")
ggnet(net, mode = 'kamadakawai', size = 6, alpha = .5, segment.color=edge_color, label.nodes=T, segment.alpha = 0.5, color = "black") +
theme(legend.position = "none") +
theme(plot.margin = unit(c(0.1,0.1,0.1,0.1), "cm"))
You should include node.group =type
ggnet(net, mode = 'kamadakawai', size = 6, alpha = .5, label.nodes=F, segment.alpha = 0.5, color = "black", node.group=type) +
theme(legend.position = "none")+
theme(plot.margin = unit(c(0.1,0.1,0.1,0.1), "cm"))
You could also construct the plot directly using the network package, which I think ggnet is using under the hood.
# Dominik's data structure
edgelist<-structure(c(2L, 6L, 2L, 6L, 7L, 7L, 2L, 7L, 6L, 8L, 8L, 4L, 8L,
2L, 9L, 8L, 7L, 6L, 9L, 1L, 9L, 4L, 9L, 3L, 2L, 10L, 9L, 10L,
8L, 10L, 7L, 6L, 10L, 1L, 2L, 12L, 9L, 8L, 12L, 1L, 11L, 10L,
2L, 44L, 79L, 10L, 8L, 47L, 45L, 51L, 9L, 11L, 74L, 75L, 77L,
69L, 75L, 77L, 78L, 2L, 44L, 44L, 46L, 46L, 8L, 6L, 1L, 1L, 6L,
7L, 1L, 4L, 7L, 8L, 8L, 1L, 4L, 8L, 3L, 8L, 8L, 9L, 9L, 9L, 1L,
9L, 5L, 9L, 3L, 9L, 9L, 9L, 10L, 8L, 10L, 7L, 10L, 10L, 1L, 10L,
10L, 9L, 12L, 12L, 1L, 12L, 12L, 12L, 12L, 7L, 7L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 7L, 7L, 7L, 7L, 44L,
10L, 9L, 42L, 43L, 46L, 46L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, -1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, -1L, 1L, 1L, 1L, -1L, -1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, -1L), .Dim = c(66L,
3L), .Dimnames = list(NULL, c("from", "to", "impact")))
# construct a network object
net<-as.network.matrix(edgelist,matrix.type='edgelist',
ignore.eval=FALSE,names.eval='impact')
# plot it, using the impact edge attribute to control edge color
plot(net,edge.col=ifelse(net%e%'impact'==1,'green','red'))
Nate Pope's answer above about changing the range of the ids to remove isolates still applies. However, you can ask plot.network not to draw the isolates:
plot(net,edge.col=ifelse(net%e%'impact'==1,'green','red'),displayisolates=FALSE)