I'm very new to coding so I'm basically googling everything but I couldn't figure this one out:
I have a data frame of 32 rows, and 19 columns. I want to calculate the sum of each row in three specific columns.
I'm writing it like this:
D10 - my data frame.
Compliance_score - the new column I want to add
Compliance_1-3 - the columns I want to sum
D10$Compliance_score = rowSums(D10[ ,c("Compliance_1", "Compliance_2", "Compliance_3"), drop = FALSE])
I keep getting the error: "incorrect number of dimensions".
Can't figure out what I'm doing wrong, or what this error message even means.
Any thoughts?
**editing: if I understood correctly what is a reproduce example (this is my first time, I hope I got this right- if not please let me know)
> dput(head(D10))
structure(list(PP = c("003", "014", "047", "013", "053", "048"
), MAAS_1 = c("4.0", "4.0", "3.0", "5.0", "3.0", "4.0"), MAAS_2 =
c("3.0",
"1.0", "6.0", "4.0", "3.0", "3.0"), MAAS_3 = c("4.0", "5.0",
"4.0", "3.0", "4.0", "4.0"), MAAS_4 = c("2.0", "2.0", "6.0",
"2.0", "3.0", "4.0"), MAAS_5 = c("3.0", "3.0", "4.0", "5.0",
"5.0", "5.0"), MAAS_6 = c("3.0", "3.0", "4.0", "3.0", "2.0",
"4.0"), MAAS_7 = c("3.0", "3.0", "4.0", "3.0", "3.0", "5.0"),
MAAS_8 = c("2.0", "4.0", "4.0", "4.0", "4.0", "4.0"), MAAS_9
= c("3.0",
"4.0", "3.0", "2.0", "4.0", "5.0"), MAAS_10 = c("3.0", "4.0",
"4.0", "2.0", "4.0", "4.0"), MAAS_11 = c("2.0", "5.0", "4.0",
"4.0", "1.0", "5.0"), MAAS_12 = c("2.0", "5.0", "6.0", "3.0",
"3.0", "6.0"), MAAS_13 = c("3.0", "3.0", "5.0", "3.0", "3.0",
"2.0"), MAAS_14 = c("3.0", "4.0", "5.0", "4.0", "4.0", "4.0"
), MAAS_15 = c("3.0", "5.0", "6.0", "3.0", "5.0", "5.0"),
Compliance_1 = c("0.0", "0.0", "0.0", "0.0", "1.0", "0.0"
), Compliance_2 = c("1.0", "0.0", "1.0", "0.0", "1.0", "0.0"
), Compliance_3 = c("0.0", "0.0", "0.0", "0.0", "0.0", "0.0"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl",
"data.frame"
))
>
Does that make sense?
you can try this :
library(tidyverse)
New_D <- D10 %>%
mutate(Compliance_score = sum(c(Compliance_1, Compliance_2, Compliance_3), na.rm=TRUE))
But a reproducible example would be great to understand the error.
Claire
Your problem is that your data is stored as a character, hence you need transform it to the class numeric in order to calculate the sum, i.e.
library(dplyr)
New_D <- df %>%
mutate(across(starts_with("Compliance"), as.numeric)) %>%
mutate(Compliance_score = Compliance_1 + Compliance_2 + Compliance_3)
Or with #Claire suggestion when you have NA'values
New_D <- df %>%
mutate(across(starts_with("Compliance"), as.numeric)) %>%
mutate(Compliance_score = sum(c(Compliance_1, Compliance_2, Compliance_3),
na.rm=TRUE))
I want to create a slope graph in R like this using ggplot
https://rud.is/b/2013/01/11/slopegraphs-in-r/
after cleaning the data and melt the data frame i ran into an error like this:
Error: Aesthetics must be either length 1 or the same as the data (182): x, y, group, colour, label
There's no NAs in my data. Any ideas? Much appreciated!
Here's the code
#Read file as numeric data
betterlife<-read.csv("betterlife.csv",skip=4,stringsAsFactors = F)
num_data <- data.frame(data.matrix(betterlife))
numeric_columns <- sapply(num_data,function(x){mean(as.numeric(is.na(x)))<0.5})
final_data <- data.frame(num_data[,numeric_columns],
betterlife[,!numeric_columns])
## rescale selected columns data frame
final_data <- data.frame(lapply(final_data[,c(3,4,5,6,7,10,11)], function(x) scale(x, center = FALSE, scale = max(x, na.rm = TRUE)/100)))
## Add country names as indicator
final_data["INDICATOR"] <- NA
final_data$INDICATOR <- betterlife$INDICATOR
employment.data <- final_data[5:30,]
indicator <- employment.data$INDICATOR
## Melt data to draw graph
employment.melt <- melt(employment.data)
#plot
sg = ggplot(employment.melt, aes(factor(variable), value,
group = indicator,
colour = indicator,
label = indicator)) +
theme(legend.position = "none",
axis.text.x = element_text(size=5),
axis.text.y=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.ticks=element_blank(),
axis.line=element_blank(),
panel.grid.major.x = element_line("black", size = 0.1),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.background = element_blank())
sg1
This is the data I'm working with
dput(betterlife)
structure(list(X = c("", "ISO3", "AUS", "AUT", "BEL", "CAN",
"CHL", "CZE", "DNK", "EST", "FIN", "FRA", "DEU", "GRC", "HUN",
"ISL", "IRL", "ISR", "ITA", "JPN", "KOR", "LUX", "MEX", "NLD",
"NZL", "NOR", "POL", "PRT", "SVK", "SVN", "ESP", "SWE", "CHE",
"TUR", "GBR", "USA", "OECD", "", ""),
INDICATOR = c("UNIT", "COUNTRY",
"Australia", "Austria", "Belgium", "Canada", "Chile", "Czech Republic",
"Denmark", "Estonia", "Finland", "France", "Germany", "Greece",
"Hungary", "Iceland", "Ireland", "Israel", "Italy", "Japan",
"Korea", "Luxembourg", "Mexico", "Netherlands", "New Zealand",
"Norway", "Poland", "Portugal", "Slovak Republic", "Slovenia",
"Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom",
"United States", "OECD average", "", "n.a. : not available"),
Rooms.per.person = c("Average number of rooms shared per person in a dwelling",
"", "2.4", "1.7", "2.3", "2.5", "1.3", "1.3", "1.9", "1.2",
"1.9", "1.8", "1.7", "1.2", "1", "1.6", "2.1", "1.1", "1.4",
"1.8", "1.3", "1.9", "1.566666667", "2", "2.3", "1.9", "1",
"1.5", "1.1", "1.1", "1.9", "1.8", "1.7", "0.7", "1.8", "1.605208333",
"1.6", "", ""),
Dwelling.without.basic.facilities = c("% of people without indoor flushing toilets in their home",
"", "3.425714286", "1.3", "0.6", "2.722", "9.36", "0.7",
"0", "12.2", "0.8", "0.8", "1.2", "1.8", "7.1", "0.3", "0.3",
"2.52", "0.2", "6.4", "7.46", "0.8", "6.6", "0", "2.984285714",
"0.1", "4.8", "2.4", "1.1", "0.6", "0", "0", "0.1", "17.1",
"0.5", "0", "2.82", "", ""),
Household.disposable.income = c("USD (PPPs adjusted)",
"", "27,039", "27,670", "26,008", "27,015", "8,712", "16,690",
"22,929", "13,486", "24,246", "27,508", "27,665", "21,499",
"13,858", "19,621", "24,313", "22,539", "24,383", "23,210",
"16,254", "19,621", "12,182", "25,977", "18,819", "29,366",
"13,811", "18,540", "15,490", "19,890", "22,972", "26,543",
"27,542", "21,030", "27,208", "37,685", "22,284", "", ""),
Employment.rate = c("% of the working age population (15-64)",
"", "72.3", "71.73", "62.01", "71.68", "59.32", "65", "73.44",
"61.02", "68.15", "63.99", "71.1", "59.55", "55.4", "78.17",
"59.96", "59.21", "56.89", "70.11", "63.31", "65.21", "60.39",
"74.67", "72.34", "75.31", "59.26", "65.55", "58.76", "66.2",
"58.55", "72.73", "78.59", "46.29", "69.51", "66.71", "64.52",
"", ""),
Long.term.unemployment.rate = c("% of people, aged 15-64, who are not working but have been actively seeking a job for over a year",
"", "1", "1.13", "4.07", "0.97", "2.98375", "3.19", "1.44",
"7.84", "2.01", "3.75", "3.4", "5.73", "5.68", "1.35", "6.74",
"1.85", "4.13", "1.99", "0.01", "1.29", "0.13", "1.24", "0.6",
"0.34", "2.49", "5.97", "8.56", "3.21", "9.1", "1.42", "1.49",
"3.11", "2.59", "2.85", "2.74", "", ""),
Quality.of.support.network = c("% of people who have friends or relatives to rely on in case of need",
"", "95.4", "94.6", "92.6", "95.3", "85.2", "88.9", "96.8",
"84.6", "93.4", "93.9", "93.5", "86.1", "88.6", "97.6", "97.3",
"93", "86", "89.7", "79.8", "95", "87.1", "94.8", "97.1",
"93.1", "92.2", "83.3", "89.6", "90.7", "94.1", "96.2", "93.2",
"78.8", "94.9", "92.3", "91.1", "", ""),
Educational.attainment = c("% of people, aged 15-64, having at least an upper-secondary (high-school) degree",
"", "69.72", "81.04", "69.58", "87.07", "67.97", "90.9",
"74.56", "88.48", "81.07", "69.96", "85.33", "61.07", "79.7",
"64.13", "69.45", "81.23", "53.31", "87", "79.14", "67.94",
"33.55", "73.29", "72.05", "80.7", "87.15", "28.25", "89.93",
"82.04", "51.23", "85.04", "86.81", "30.31", "69.63", "88.7",
"72.95", "", ""),
Students.reading.skills = c("Average reading performance of students aged 15, according to PISA",
"", "515", "470", "506", "524", "449", "478", "495", "501",
"536", "496", "497", "483", "494", "500", "496", "474", "486",
"520", "539", "472", "425", "508", "521", "503", "500", "489",
"477", "483", "481", "497", "501", "464", "494", "500", "493",
"", ""),
Air.pollution = c("Average concentration of particulate matter (PM10) in cities with population larger than 100 000, measured in micrograms per cubic meter",
"", "14.28", "29.03", "21.27", "15", "61.55", "18.5", "16.26",
"12.62", "14.87", "12.94", "16.21", "32", "15.6", "14.47",
"12.54", "27.57", "23.33", "27.14", "30.76", "12.63", "32.69",
"30.76", "11.93", "15.85", "35.07", "21", "13.14", "29.03",
"27.56", "10.52", "22.36", "37.06", "12.67", "19.4", "21.99",
"", ""),
Consultation.on.rule.making = c("Composite index, increasing with the number of key elements of formal consultation processes",
"", "10.5", "7.13", "4.5", "10.5", "2", "6.75", "7", "3.25",
"9", "3.5", "4.5", "6.5", "7.88", "5.13", "9", "2.5", "5",
"7.25", "10.38", "6", "9", "6.13", "10.25", "8.13", "10.75",
"6.5", "6.63", "10.25", "7.25", "10.88", "8.38", "5.5", "11.5",
"8.25", "7.28", "", ""),
Voter.turnout = c("Number of people voting as % of the registered population ",
"", "95", "82", "91", "60", "88", "64", "87", "62", "74",
"84", "78", "74", "64", "84", "67", "65", "81", "67", "63",
"57", "59", "80", "79", "77", "54", "64", "55", "63", "75",
"82", "48", "84", "61", "90", "72", "", ""),
Life.expectancy = c("Average number of years a person can expect to live",
"", "81.5", "80.5", "79.8", "80.7", "77.8", "77.3", "78.8",
"73.9", "79.9", "81", "80.2", "80", "73.8", "81.3", "79.9",
"81.1", "81.5", "82.7", "79.9", "80.6", "75.1", "80.2", "80.4",
"80.6", "75.6", "79.3", "74.8", "78.8", "81.2", "81.2", "82.2",
"73.6", "79.7", "77.9", "79.2", "", ""),
Self.reported.health = c("% of people reporting their health to be \"good or very good\"",
"", "84.9", "69.6", "76.7", "88.1", "56.2", "68.2", "74.3",
"56.3", "67.7", "72.4", "64.7", "76.4", "55.2", "80.6", "84.4",
"79.7", "63.4", "32.7", "43.7", "74", "65.5", "80.6", "89.7",
"80", "57.7", "48.6", "31.1", "58.8", "69.8", "79.1", "80.95",
"66.8", "76", "88", "69", "", ""),
Life.Satisfaction = c("Average self-evaluation of life satisfaction, on a scale from 0 to 10",
"", "7.5", "7.3", "6.9", "7.7", "6.6", "6.2", "7.8", "5.1",
"7.4", "6.8", "6.7", "5.8", "4.7", "6.9", "7.3", "7.4", "6.4",
"6.1", "6.1", "7.1", "6.8", "7.5", "7.2", "7.6", "5.8", "4.9",
"6.1", "6.1", "6.2", "7.5", "7.5", "5.5", "7", "7.2", "6.7",
"", ""),
Homicide.rate = c("Average number of reported homicides per 100 000 people",
"", "1.2", "0.5", "1.8", "1.7", "8.1", "2", "1.4", "6.3",
"2.5", "1.4", "0.8", "1.1", "1.5", "0", "2", "2.4", "1.2",
"0.5", "2.3", "1.5", "11.6", "1", "1.3", "0.6", "1.2", "1.2",
"1.7", "0.5", "0.9", "0.9", "0.7", "2.9", "2.6", "5.2", "2.1",
"", ""),
Assault.rate = c("% of people who report having been assaulted in the previous year",
"", "2.1", "3", "7.3", "1.4", "9.5", "3.5", "3.9", "6.2",
"2.4", "4.9", "3.6", "3.8", "3.8", "2.7", "2.7", "3.1", "4.7",
"1.6", "2.1", "4.3", "14.8", "5", "2.3", "3.3", "2.2", "6.2",
"3.5", "3.9", "4.2", "5.2", "4.2", "6", "1.9", "1.6", "4.1",
"", "")),
.Names = c("X", "INDICATOR", "Rooms.per.person", "Dwelling.without.basic.facilities",
"Household.disposable.income", "Employment.rate",
"Long.term.unemployment.rate", "Quality.of.support.network",
"Educational.attainment", "Students.reading.skills", "Air.pollution",
"Consultation.on.rule.making", "Voter.turnout", "Life.expectancy",
"Self.reported.health", "Life.Satisfaction", "Homicide.rate",
"Assault.rate"), class = "data.frame", row.names = c(NA, -39L))
Did I melt the data frame wrongly? since the index of each row are not in the correct order
I want to plot procrustes rotations between to RDA-objects obtained by vegan with ggplot2.
library(vegan)
#perform two RDAs, do procrustes:
pro.test <- procrustes(rda.t1,rda.t2)
I extracted the x,y coordinates from list of class "procrustes" and added a factor "dates".
test <- data.frame(rda1=pro.test$Yrot[,1], rda2=pro.test$Yrot[,2])
test$dates <- c(rep("A", 8), rep("B",8), rep("C", 8))
test.2 <- data.frame(rda1=pro.test$X[,1], rda2=pro.test$X[,2])
test.2$dates <- c(rep("A", 8), rep("B",8), rep("C", 8))
Now the basic plot:
ggplot() +
geom_point(data=test, aes(x=rda1, y=rda2, color=dates)) +
geom_point(data=test.2,aes(x=rda1, y=rda2, color=dates))
The part i cannot do is the plotting of small lines between each corresponding point in test and test.2
Vegan does plot these rotations with arrows rather than connected points. However, vegan does not khow to color according to sampling groups/factors, which is important for me.
Having arrows in the ggplot would be extremely great - i know there is a geom_segment with the argument "arrow".
Could you help me?
the dput of pro.test is below.
dput(pro.test)
structure(list(Yrot = structure(c(0.126093537705143, 0.196350569855869,
-0.0513472841582749, -0.304416713452662, 0.210682972922012, -0.0219477831881197,
-0.24519038499101, 0.338357488742126, -0.399739151138497, -0.366424716631558,
0.0321561053701086, 0.565794811541598, 0.606054432756139, -0.0122819831669951,
-0.00403199420346042, -0.0448308879361932, 0.0631101371381566,
-0.150820933315408, -0.018216051372273, -0.68513841544701, -0.117446131920294,
-0.450735018917557, 0.25749869839177, 0.47646869541639, -0.211447138648954,
-0.236584149111598, -0.0316882271224907, -0.281680981927695,
-0.182346139754316, -0.366221121187894, -0.263915986724565, -0.203160918536977,
0.209888424862468, 0.219400450315756, 0.143569801341895, 0.258388604988749,
0.542334722496036, 0.465147580652753, 0.294835945722885, 0.523372408452242,
0.0739580893460179, 0.242768571724456, 0.0409877673276456, -0.0942111509903291,
-0.193072299067071, -0.38889179801965, -0.352882980509932, -0.208549475629433
), .Dim = c(24L, 2L), .Dimnames = list(c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21", "22", "23", "24", "25"), NULL)),
X = structure(c(0.0860177119127241, 0.217144585357183, -0.0301829830202831,
-0.246142550516987, 0.230574651598493, 0.00485065775494245,
-0.225907453854864, 0.371465194869491, -0.395330365511425,
-0.359255005182027, -0.00775013746753128, 0.47442649486468,
0.519983070801763, -0.0146878517934982, 0.0377018407084686,
-0.0885829362985767, 0.0935962405791314, -0.186192083265912,
0.00247095461296341, -0.655467761687806, -0.0966978065526177,
-0.398672122636169, 0.275589258531376, 0.39104839619648,
-0.273098318897548, -0.237373845171625, -0.0351119316278201,
-0.279271270040404, -0.171188235636994, -0.342350443283954,
-0.297148604541773, -0.21965804713297, 0.269830887309913,
0.268669489120665, 0.143826114581508, 0.229549645414776,
0.531869658831067, 0.479136042616735, 0.380638462867711,
0.548249030471031, 0.161449266776772, 0.282765937749097,
0.0756433464279055, 0.00516171212969907, -0.195519622624857,
-0.568932423412245, -0.381681091857682, -0.375455760069009
), .Dim = c(24L, 2L), const = 1.30375728392289, .Dimnames = list(
c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
"12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25"), c("RDA1", "RDA2")), "`scaled:center`" = structure(c(1.3588667228485e-17,
4.69820941410385e-18), .Names = c("RDA1", "RDA2"))), ss = 0.146265769408323,
rotation = structure(c(-0.958883328045618, 0.283800569407742,
0.283800569407742, 0.958883328045619), .Dim = c(2L, 2L)),
translation = structure(c(-9.76742606822348e-18, 1.35523649355013e-17
), .Dim = 1:2), scale = 0.918742698883168, xmean = structure(c(1.3586408473959e-17,
4.71176194125992e-18), .Names = c("RDA1", "RDA2")), symmetric = FALSE,
call = procrustes(X = rda.t1, Y = rda.t2), svd = structure(list(
d = c(2.51563498111738, 2.16729713036852), u = structure(c(0.743008138366833,
0.669282381600362, 0.669282381600362, -0.743008138366833
), .Dim = c(2L, 2L)), v = structure(c(-0.522515395489416,
0.852629850214347, -0.852629850214347, -0.522515395489416
), .Dim = c(2L, 2L))), .Names = c("d", "u", "v"))), .Names = c("Yrot",
"X", "ss", "rotation", "translation", "scale", "xmean", "symmetric",
"call", "svd"), class = "procrustes")
Does this do the job?
library(ggplot2)
library(grid)
ctest <- data.frame(rda1=pro.test$Yrot[,1],
rda2=pro.test$Yrot[,2],xrda1=pro.test$X[,1],
xrda2=pro.test$X[,2],dates=rep(c("A","B","C"),each=8))
ggplot(ctest) +
geom_point(aes(x=rda1, y=rda2, colour=dates)) +
geom_point(aes(x=xrda1, y=xrda2, colour=dates)) +
geom_segment(aes(x=rda1,y=rda2,xend=xrda1,yend=xrda2,colour=dates),arrow=arrow(length=unit(0.2,"cm")))
I have been trying to use qgraph to generate the network graph. The code is as following
Gw <- qgraph(edgeList, diag = TRUE, labels = TRUE,legend.cex = 0.3, vsize = 1,edge.color=colorLabels,legend=TRUE,asize=1)
The figure can be generated, but the R command line gives the following error message. I do not know what does the invalid color name 'background' mean.
The dput result is shown as follows,
dput(edgeList)
structure(c("1", "2", "2", "3", "4", "5", "6", "7", "8", "1",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "16",
"4", "5", "7", "1", "9", "10", "19", "20", "2", "16", "21", "3",
"22", "5", "23", "8", "1", "20", "2", "13", "14", "17", "14",
"1", "19", "14", "2", "21", "14", "24", "1", ":499.3", "nk Transfe",
"de of tran", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"ine:4121", "ine:3257.4", "ine:75.2", "ine:75.2", "ine:11615.",
"ine:10603", "ine:334.2", "ine:7256.8", "ine:7256.8", "ine:996.8",
"ine:884.6", "ine:364.9", "ine:6360", "ine:5640.9", "ine:2729.7",
"ine:5482.6", "ine:85", "ine:1474.9", "ine:700.8", "ine:2754.6",
"ine:3257.4", "ine:3257.4", "ine:7307.8", "ine:18560.", "ine:85.1",
"ine:364.8", ":700.1", ":5317", "l:4258.9", "l:4258.9", "l:1637.6",
"l:1637.6", "l:46.4", "l:3938.5", "l:3938.5", "l:2800.4", "l:2715.1",
"l:2715.1", "l:12708.2", "l:1042", ":499.3", "nk Transfe", "de of tran",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "ine:4121",
"ine:3257.4", "ine:75.2", "ine:75.2", "ine:11615.", "ine:10603",
"ine:334.2", "ine:7256.8", "ine:7256.8", "ine:996.8", "ine:884.6",
"ine:364.9", "ine:6360", "ine:5640.9", "ine:2729.7", "ine:5482.6",
"ine:85", "ine:1474.9", "ine:700.8", "ine:2754.6", "ine:3257.4",
"ine:3257.4", "ine:7307.8", "ine:18560.", "ine:85.1", "ine:364.8",
":700.1", ":5317", "l:4258.9", "l:4258.9", "l:1637.6", "l:1637.6",
"l:46.4", "l:3938.5", "l:3938.5", "l:2800.4", "l:2715.1", "l:2715.1",
"l:12708.2", "l:1042", "25", "1", "1", "26", "27", "28", "29",
"30", "31", "25", "32", "33", "4", "4", "3", "3", "5", "5", "7",
"6", "6", "27", "28", "30", "25", "32", "33", "9", "8", "1",
"1", "10", "12", "12", "16", "16", "16", "16", "8", "1", "3",
"3", "7", "7", "25", "9", "9", "1", "10", "10", "14", "14"), .Dim = c(104L,
2L), .Dimnames = list(NULL, c("newsendId", "newtoId")))
The generated figure is as follows. I used the following command to generate it
Gw <- qgraph(edgeList, layout = "spring", diag = FALSE, labels = TRUE, cut = NULL, edge.color = "red",legend.cex = 0.5, vsize = 8)
Which nodes are problems? With your data and code you can modify label.cex. There are other variations of the arguments for the label and legend sizes. Here is one version, with the color blue.
library(qgraph)
Gw <- qgraph(edgeList, layout = "spring", diag = FALSE, labels = TRUE, cut = NULL, edge.color = "red", legend.cex = 0.3, vsize = 4, label.cex = 0.3, label.color = "blue")
Gw