Performing Anova for all vars in R - r

R has aov-function for anova. So if I want to perform it, i can write simple code:
df2=read.csv("C:/Users/Admin/Desktop/222.csv",sep=";", dec=",")
fit=aov(x1 ~ Q7,data=df2)
summary(fit)
I can perform 3 anova for each variables, if i three time write this code.
But how perform anova for all three variables at once? I.E. not writing this for each X.
df2=
structure(list(Q7 = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"),
x1 = c(13L, 31L, 17L, 27L, 24L, 21L, 34L, 28L, 13L, 25L,
12L, 16L, 26L, 27L, 20L), x2 = c(32L, 29L, 16L, 27L, 27L,
22L, 36L, 35L, 22L, 27L, 13L, 30L, 17L, 27L, 9L), x3 = c(29L,
13L, 18L, 25L, 19L, 21L, 24L, 22L, 30L, 30L, 14L, 28L, 19L,
11L, 13L)), .Names = c("Q7", "x1", "x2", "x3"), class = "data.frame", row.names = c(NA,
-15L))

You can use reformulate() to come up with the formulas to be used within aov` function.
lapply(names(df2[-1]),function(x)aov(reformulate("Q7",x),data=df2))
You can also add summary in the code above

Related

R Object not found when aggregating data

I want to have the mean score of every Dish 'group'. However, when I try using the following code it doesn't find the Objects Dish/Score.
dishes <- read.table(file=theFile, header=TRUE, sep=",")
aggdata <- aggregate(formula= Score ~ Dish,
data = dishes,
FUN = mean)
And a dput from some of the data for reconstruction:
structure(list(UserName.Dish.Score = structure(c(26L, 25L, 23L,
24L, 13L, 14L, 15L, 17L, 28L, 22L, 12L, 30L, 29L, 20L, 18L, 19L,
16L, 27L, 21L, 10L, 9L, 7L, 8L, 2L, 3L, 4L, 5L, 11L, 6L, 1L), .Label = c("kjetsand;Bacalao;3",
"kjetsand;Chicken Curry;5", "kjetsand;Chicken Tikka Masala;5",
"kjetsand;Chili Con Carne;3", "kjetsand;Kebab;3", "kjetsand;Paella;3",
"kjetsand;Pasta Bolognese;3", "kjetsand;Pasta Carbonara;3", "kjetsand;Pizza Margherita;3",
"kjetsand;Pizza Napolitana;4", "kjetsand;Sushi;5", "nilstesd;Bacalao;6",
"nilstesd;Chicken Curry;4", "nilstesd;Chicken Tikka Masala;5",
"nilstesd;Chili Con Carne;4", "nilstesd;Coq au vin;5", "nilstesd;Kebab;3",
"nilstesd;Kentucky Fried Chicken;2", "nilstesd;Lutefisk;5", "nilstesd;MacDonalds Cheeseburger;1",
"nilstesd;Moules frites;5", "nilstesd;Paella;6", "nilstesd;Pasta Bolognese;5",
"nilstesd;Pasta Carbonara;5", "nilstesd;Pizza Margherita;4",
"nilstesd;Pizza Napolitana;6", "nilstesd;Ratatouille;4", "nilstesd;Sushi;6",
"nilstesd;Sweet And Sour Pork;3", "nilstesd;Taco;2"), class = "factor")), .Names = "UserName.Dish.Score", row.names = c(NA,
30L), class = "data.frame")
change the class of Score to numeric
dishes$Score <- as.numeric(dishes$Score)
then you can aggregate your data
aggregate(formula= Score ~ Dish,
data = dishes,
FUN = mean)

Convert Factor to Date column [duplicate]

This question already has an answer here:
month language in the as.date function
(1 answer)
Closed 5 years ago.
My data frame is:
x=structure(list(V1 = structure(c(33L, 35L, 36L, 37L, 39L, 4L,
6L, 7L, 8L, 10L, 14L, 16L, 18L, 19L, 21L, 25L, 27L, 28L, 29L,
30L, 1L, 17L, 31L, 32L, 34L, 38L, 40L, 2L, 3L, 5L, 9L, 11L, 12L,
13L, 15L, 20L, 22L, 23L, 24L, 26L), .Label = c("1-Feb-71", "10-Feb-71",
"11-Feb-71", "11-Jan-71", "12-Feb-71", "12-Jan-71", "13-Jan-71",
"14-Jan-71", "15-Feb-71", "15-Jan-71", "16-Feb-71", "17-Feb-71",
"18-Feb-71", "18-Jan-71", "19-Feb-71", "19-Jan-71", "2-Feb-71",
"20-Jan-71", "21-Jan-71", "22-Feb-71", "22-Jan-71", "23-Feb-71",
"24-Feb-71", "25-Feb-71", "25-Jan-71", "26-Feb-71", "26-Jan-71",
"27-Jan-71", "28-Jan-71", "29-Jan-71", "3-Feb-71", "4-Feb-71",
"4-Jan-71", "5-Feb-71", "5-Jan-71", "6-Jan-71", "7-Jan-71", "8-Feb-71",
"8-Jan-71", "9-Feb-71"), class = "factor"), V2 = structure(c(1L,
15L, 2L, 4L, 3L, 5L, 10L, 5L, 7L, 12L, 8L, 16L, 16L, 22L, 16L,
19L, 22L, 12L, 17L, 23L, 24L, 24L, 21L, 17L, 19L, 16L, 6L, 11L,
9L, 25L, 25L, 8L, 5L, 13L, 20L, 18L, 16L, 13L, 12L, 14L), .Label = c("7.1359",
"7.1367", "7.1382", "7.1386", "7.1390", "7.1397", "7.1403", "7.1406",
"7.1410", "7.1411", "7.1412", "7.1414", "7.1418", "7.1420", "7.1422",
"7.1429", "7.1431", "7.1434", "7.1435", "7.1437", "7.1439", "7.1443",
"7.1445", "7.1465", "ND"), class = "factor")), .Names = c("V1",
"V2"), class = "data.frame", row.names = c(NA, -40L))
I am trying to convert column V1 to Date, but it is not working. Ive been looking some topics but it just doesnt work.
This my code:
x$V1 <- as.Date(x$V1, format="%d-%b-%y")
It works for some rows of V1 column but not for others.
Any help?
In my version of R, the conversion in your example only works for January and not for February. I think it is related to the language.
For example, in French, February is coded as fév and so Feb is not recognized.
Once I did:
x$V1=gsub("Feb", "fév", x$V1)
it worked.
It probably depends on which language your version of R uses.

How to convert list to graph object in R?

I have adjacency list as shown here- dput(data)
list(c(2L, 3L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 18L, 22L,
32L), c(1L, 3L, 4L, 8L, 14L, 18L, 20L, 31L), c(2L, 4L, 9L, 10L,
14L, 28L, 29L, 33L), c(1L, 2L, 3L, 8L, 13L), c(1L, 7L, 11L),
c(1L, 11L, 17L), c(5L, 6L, 17L), c(1L, 3L, 4L), c(1L, 3L,
31L, 34L), c(3L, 34L, 33L), c(1L, 5L, 6L), c(1L, 32L, 23L
), c(1L, 4L, 2L), c(1L, 2L, 4L, 34L), c(33L, 34L, 29L), c(33L,
34L, 33L), c(6L, 7L, 19L), c(1L, 2L, 10L), c(33L, 34L, 14L
), c(1L, 2L, 34L), c(33L, 34L, 26L), c(1L, 2L, 15L), c(33L,
34L, 6L), c(26L, 28L, 33L, 34L), c(26L, 28L, 32L), c(24L,
25L, 32L), c(30L, 34L, 20L), c(3L, 24L, 34L), c(3L, 32L,
34L), c(24L, 27L, 33L), c(9L, 33L, 34L), c(1L, 25L, 26L,
33L), c(3L, 9L, 15L, 16L, 19L, 21L, 24L, 30L, 31L, 32L, 34L
), c(9L, 10L, 14L, 15L, 16L, 19L, 21L, 23L, 24L, 28L, 29L,
30L, 31L, 32L, 33L))
The list is similar to adjacency list and I wanted to convert it into igraph object in R. I have tried using
graph_from_data_frame(data, directed = FALSE, vertices = NULL)
but it is not working. I have also tried
graph_from_adj_list(data,mode="all",duplicate=FALSE)
but this is giving wrong graph. For example 1-> 2,3,5,6 should have edges as 1->2,1->3,1->5,1-6; but it is giving random output as 1->3,2->5,3->6,2->6 ...likewise.
Any idea, how this could be done in R?

Byte code version mismatch when using subset

I have been working on the same R script now for 5 months, had some minor coding problems, but this morning I got a problem that makes me unable to run the whole script. To clean my imported data I use a lot of subset(), but this morning when running the code I got the Warning:
Error in subset(T23810, date < as.Date("2015-10-22")) : byte code version mismatch
It appears that I only get this warning after trying to run a subset function, but it blocks my whole script at the moment. What could be the cause and solution for this?
EDIT: Reproducible example
x = structure(list(names = structure(c(11L, 3L, 5L, 27L, 26L, 15L,
18L, 13L, 8L, 2L, 22L, 12L, 1L, 25L, 29L, 31L, 6L, 23L, 28L,
14L, 19L, 4L, 10L, 16L, 9L, 17L, 21L, 30L, 7L, 6L, 27L, 26L,
12L, 13L, 14L, 4L, 28L, 15L, 31L, 23L, 1L, 22L, 11L, 18L, 3L,
20L, 8L, 5L, 16L, 2L, 25L, 30L, 21L, 4L, 6L, 3L, 5L, 27L, 14L,
11L, 26L, 31L, 13L, 18L, 15L, 1L, 23L, 2L, 8L, 28L, 30L, 20L,
22L, 12L, 10L, 16L, 21L, 25L, 17L, 24L, 32L, 31L, 23L, 26L, 1L,
18L, 11L, 12L, 3L, 15L, 27L, 28L, 5L, 22L, 6L, 17L, 20L, 2L,
8L, 21L, 30L, 13L, 25L, 24L, 7L, 4L, 10L, 16L, 14L), .Label = c("50/50",
"Babylon", "Big Rock Market", "Core Gut", "Customs House", "David's Dropoff",
"David's Dropoff Deep", "Diamond Rock", "Giles Quarter", "Green Island",
"Greer Gut", "Hole in the Corner", "Hot Springs", "Ladder Labyrinth",
"Man O War", "Mount Michel", "Muck Dive", "Outer Limits", "Poriotes Point",
"Porites Point", "Rays & Anchors", "Shark Shoals", "Tedran",
"Tent Boulders", "Tent Deep", "Tent Reef", "Tent Wall", "Third Encounter",
"Torens Point", "Torrens Point", "Twilight Zone", "Wells Bay"
), class = "factor")), .Names = "names", row.names = c(NA, -109L
), class = "data.frame")
Then if I execute the following:
x[x=="Torens Point"] = "Torrens Point"
x[x=="Poriotes Point"] = "Porites Point"
x = droplevels(subset(x, names != "Muck Dive"))
I get the error:
Error in subset(x, names != "Muck Dive") : byte code version mismatch
Okay solved it and in the end it was pretty easy. Since I am working on a server and rely on versions of R that are installed on that server I didn't realize how to update R itself. Now I got it it seems to work. Thank you all for your help! This one is SOLVED!

How can I sort one column based on another?

I have the following data:
points <- structure(list(A = structure(c(1L, 1L, 2L, 2L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 7L, 8L, 9L, 9L, 10L, 11L, 12L, 12L, 13L,
14L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 21L, 21L,
22L, 23L, 24L, 24L, 25L, 26L, 26L, 27L, 28L, 29L, 30L, 30L, 31L,
32L, 32L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L, 38L, 38L,
39L, 39L, 40L, 41L), .Label = c("00017dd3-f55e-e011-854c-00237de2db9e",
"0005f624-565a-e011-854c-00237de2db9e", "0007b82f-bfe0-4b55-963e-be5a2a1e7f7b",
"00095b52-fd0a-e011-9264-00237de2db9e", "00098835-9554-4898-8d4b-82d42b8b4464",
"000a727f-8334-e011-854c-00237de2db9e", "000c0a31-f459-4365-aa3a-1978deb89f67",
"000e36a4-6e56-4851-8d36-2caf0bdd63ec", "000f05a6-cf94-4518-8de7-1773cbea8198",
"00105574-a775-43e8-8472-c8b294e46786", "00112a96-3c47-409c-83bd-6f30d8d77100",
"0012f133-f68e-e011-986b-78e7d1fa76f8", "0012f899-1c45-4917-90b7-11bea31e467e",
"0014606b-17b7-46d6-957f-e23b43fcc773", "001478e2-3e50-486c-ae3b-d1ceb36f0fd0",
"00159bab-ce82-454a-9343-f7d8f1500a68", "0015b84e-a48d-443e-936e-cabdb80604dc",
"0018f8ba-c289-4483-bf74-5cd0e6c6ae9e", "0019487f-f31e-4e3e-b499-fd48077f71f9",
"00199523-c42f-47fd-a44a-066fb726f6dd", "0019dace-41e1-439f-8b73-328d02537fe7",
"001a346e-2a15-45d4-9fb1-6b4e2448d362", "001b0c90-5c86-4290-bad3-0d6794a6bfe8",
"001c0d0d-3059-e011-854c-00237de2db9e", "001c9cbb-8c79-4cbf-bc50-219a70ab20b8",
"001dcf83-7492-e011-986b-78e7d1fa76f8", "001dd5cf-3e3b-4ceb-823c-346c15f88878",
"001e0ef7-b977-436a-ab20-8c4af4f5b230", "001fc407-da48-4c42-9325-7756b160cbbd",
"001fdaa1-9471-e011-81d2-78e7d1fa76f8", "0020029f-2667-4c03-b99f-d803eccd27d4",
"00218e00-896e-e011-81d2-78e7d1fa76f8", "002196af-60c7-4baf-abdb-589b3a481686",
"0021a908-7ff6-df11-9264-00237de2db9e", "0021bced-909a-e011-986b-78e7d1fa76f8",
"0021f0fb-cb9f-e011-986b-78e7d1fa76f8", "00228254-9b20-4d40-a4a5-a7c608f81dfa",
"002357ba-5656-4308-bb92-6cc97f50d7aa", "0025eafd-a64f-e011-854c-00237de2db9e",
"0026b36c-ebc2-43f0-a0f7-72f43b70530b", "00277e09-543e-449a-8571-38f71a21cee2"
), class = "factor"), B = structure(c(10L, 10L, 27L,
27L, 28L, 23L, 23L, 38L, 38L, 24L, 24L, 19L, 35L, 26L, 26L, 28L,
5L, 36L, 36L, 21L, 11L, 1L, 14L, 14L, 4L, 4L, 9L, 9L, 16L, 16L,
3L, 7L, 7L, 13L, 37L, 17L, 17L, 29L, 15L, 15L, 12L, 31L, 32L,
8L, 8L, 2L, 30L, 30L, 39L, 6L, 6L, 22L, 22L, 20L, 20L, 34L, 34L,
18L, 18L, 33L, 33L, 25L, 29L), .Label = c("Aashu", "Actonica Studio",
"appyminds", "blackink", "BroeckiE", "Challenge Solutions LLC",
"CPP_MSP", "Datentechnik Innovation GmbH", "DerekM", "Dimension Srl",
"Dmitry Kazarin", "edg3", "fruitymo", "Geckosan", "Genera Interactive SL",
"HandyWare", "Infinite Square", "JTO.C Sq.", "JuJuZ", "Kitten Flavour",
"Krofita", "Mark Agholor", "MCTronix.com", "Michael Snow", "michaloxo",
"mobilewares.net", "NotoMedia LLC", "OKR", "P.F. CHAUVET", "Panoylhs",
"Pratik Gandhi", "raavr", "ReadBooks", "RGP", "Seesmic", "The KeitaCorp",
"viileetek", "Violineage", "Yalla Apps"), class = "factor"),
Date = structure(c(1302926400, 1302926400, 1302408000,
1302408000, 1327467600, 1292994000, 1292994000, 1322370000,
1322370000, 1297486800, 1297486800, 1326949200, 1321333200,
1314763200, 1314763200, 1328418000, 1327381200, 1307505600,
1307505600, 1325221200, 1324530000, 1327381200, 1326862800,
1326862800, 1326171600, 1326171600, 1325566800, 1325566800,
1327122000, 1327122000, 1320379200, 1324702800, 1324702800,
1327726800, 1327986000, 1301544000, 1301544000, 1332302400,
1308369600, 1308369600, 1325912400, 1331611200, 1325912400,
1304481600, 1304481600, 1325653200, 1304395200, 1304395200,
1322542800, 1294117200, 1294117200, 1309147200, 1309147200,
1309320000, 1309320000, 1313208000, 1313208000, 1325739600,
1325739600, 1300334400, 1300334400, 1325826000, 1321938000
), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("A",
"B", "Date"), row.names = c(NA, -63L), class = "data.frame")
I am trying to draw a scatter plot with the Y-axis sorted based on the date field. However, I am only able to do the following where I treat A as a factor. Any suggestions on how to achieve this?
p = ggplot(points, aes(Date, factor(A))) +
geom_point(aes(colour=factor(A)), size=1.8) +
scale_shape(solid=FALSE) +
scale_y_discrete("", breaks=NA)
The error comes when you ask ggplot to consider a date value as a discrete variable after just swapping the x and y positions. It goes away when you remove that:
p = ggplot(points, aes(x=A, y=Date)) +
geom_point(aes(colour=factor(A)), size=1.8) +
scale_shape(solid=FALSE)
p
Or you can get rid of the lines by applying the discrete axis call to the x-axis:
p = ggplot(points, aes(x=A, y=Date)) +
geom_point(aes(colour=factor(A)), size=1.8) +
scale_shape(solid=FALSE) +
scale_x_discrete("", breaks=NULL)
P
Unfortunately the title of the question doesn't seem to have a very clear connection to the text of the question, so I cannot tell if this was what you were asking for.

Resources