Related
I have managed to delete a little bit of code that did the below task and can't for the life of me figure out how I did it before.
I want to widen the data that has two factors spread over 8 different 'waves'. There are four 'Paper' factors, each with the same four internal factors 'Response'. The output from a previously required function gives the following dataframe:
[
And I would like to make it look like this:
The single column of the first tibble has become the single row of the second tibble.
As you can see, the second tibble has extra factors of Paper but these can just be joined row wise.
I really wasn't sure how to attack this, but thought it would be done using the pivot_wider function. When I tried
times_correct <- times_19 %>%
pivot_wider( id_cols = c('Stay/remain in the EU`', 'Leave the EU', 'I would/will not vote', 'Don\'t know'), names_from = eurrefcolnames)
I got the error that I can't subset columns that don't exist which makes sense: I need to manually add the correct 'Waves'. I think this is relatively simple, but can't for the life of me figure out how I did it!
Here is the dput of the various tibbles:
structure(list(resp = structure(c(3L, 2L, 4L, 1L, NA, NA, NA,
NA), .Label = c("Don't Know", "Leave", "Remain", "Will Not Vote"
), class = "factor"), `Stay/remain in the EU` = c(316L, 290L,
313L, 324L, 338L, 320L, 325L, 335L), `Leave the EU` = c(157L,
123L, 159L, 154L, 134L, 189L, 187L, 181L), `I would/will not vote` = c(2L,
3L, 3L, 3L, 2L, 2L, 2L, 0L), `Don't know` = c(56L, 51L, 55L,
50L, 57L, 20L, 17L, 0L), Paper = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "Times", class = "factor")), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(resp = structure(c(3L, 2L, 4L, 1L, 3L, 2L, 4L,
1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L), .Label = c("Don't Know",
"Leave", "Remain", "Will Not Vote"), class = "factor"), euRefVoteW1 = c(316L,
157L, 2L, 56L, 190L, 339L, 4L, 70L, 819L, 79L, 9L, 71L, 1294L,
1311L, 150L, 523L, 1715L, 2587L, 133L, 630L), euRefVoteW2 = c(290L,
123L, 3L, 51L, 175L, 282L, 3L, 62L, 777L, 74L, 5L, 62L, 1091L,
925L, 80L, 371L, 1528L, 2044L, 83L, 517L), euRefVoteW3 = c(313L,
159L, 3L, 55L, 199L, 334L, 4L, 69L, 835L, 81L, 10L, 57L, 1348L,
1289L, 139L, 508L, 1766L, 2563L, 156L, 586L), euRefVoteW4 = c(324L,
154L, 3L, 50L, 215L, 328L, 2L, 61L, 848L, 70L, 10L, 55L, 1397L,
1267L, 128L, 492L, 1853L, 2494L, 143L, 583L), euRefVoteW6 = c(338L,
134L, 2L, 57L, 241L, 286L, 2L, 77L, 853L, 68L, 5L, 57L, 1519L,
1133L, 112L, 520L, 2017L, 2284L, 106L, 667L), euRefVoteW7 = c(320L,
189L, 2L, 20L, 186L, 384L, 2L, 34L, 832L, 109L, 8L, 34L, 1449L,
1456L, 87L, 292L, 1906L, 2785L, 55L, 328L), euRefVoteW8 = c(325L,
187L, 2L, 17L, 187L, 384L, 1L, 34L, 836L, 118L, 5L, 24L, 1462L,
1522L, 72L, 228L, 1898L, 2852L, 56L, 268L), euRefVoteW9 = c(335L,
181L, 0L, 0L, 206L, 385L, 0L, 6L, 844L, 102L, 0L, 4L, 1572L,
1462L, 0L, 21L, 2018L, 2827L, 0L, 20L), Paper = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L), .Label = c("Times", "Telegraph", "Control", "No_Paper",
"Rest"), class = "factor")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
eurrefcolnames = c('euRefVoteW1','euRefVoteW2', 'euRefVoteW3', 'euRefVoteW4', 'euRefVoteW6',' euRefVoteW7', 'euRefVoteW8', 'euRefVoteW9')
EDIT:
Here is the function that create the initial dataframes, is there an edit I could make here perhaps ?
tally_reader_number <- function(input_dataframe,newspaper_name) {
#function takes the input of in_all_waves, tallies the number of different eu ref responses using map_df for a given newspaper factor (defined above)
# and returns a dataframe of responese for each wave with the newspaper factor as a column
returned_dataframe <- input_dataframe %>%
filter(Paper == newspaper_name) %>%
ungroup() %>% #function refuses to work without this
select(-Paper) %>%
map_df(table) %>% # use map_df from the purrr package to "table" each column
rownames_to_column("response") %>% #convert the rownames to a column named response
mutate(resp = case_when(response == 1 ~ "Remain", #change the resulting numbers to the correct responses
response == 2 ~ "Leave",
response ==3 ~ "Will Not Vote",
response == 4 ~ "Don't Know")) %>%
select(resp, everything(), -response) %>% #reorder the columns with resp at the front, removing response
mutate(Paper = newspaper_name)
returned_dataframe$Paper <- as.factor(returned_dataframe$Paper)
returned_dataframe$resp <- as.factor(returned_dataframe$resp)
returned_dataframe
}
Given the 2D coordinates of a number of vertices, I would like to calculate the internal angles of the according polygon.
Here's a dataframe with a few polygons and the coordinates of their vertices. The problem is that the vertices aren't ordered.
structure(list(x = c(173L, 173L, 185L, 190L, 231L, 267L, 185L,
190L, 233L, 260L, 190L, 231L, 260L, 230L, 230L, 172L, 233L, 230L,
231L, 267L, 185L, 172L, 233L, 231L, 231L), y = c(299L, 299L,
321L, 360L, 361L, 377L, 321L, 360L, 363L, 309L, 360L, 361L, 309L,
322L, 322L, 378L, 363L, 322L, 391L, 377L, 321L, 378L, 363L, 391L,
361L), polygonID = c(2L, 4L, 5L, 3L, 6L, 7L, 2L, 5L, 7L, 6L, 2L,
3L, 4L, 5L, 6L, 2L, 3L, 4L, 3L, 6L, 4L, 3L, 6L, 7L, 5L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 12L, 13L, 14L, 15L, 16L, 17L,
19L, 20L, 21L, 24L, 25L, 27L, 29L, 30L, 31L, 34L), class = "data.frame")
Essentially, I would go from vertex to vertex to calculate the edges/vectors and use those to calculate the internal angles. But it's not clear to me how I would do this in an algorithmic fashion. Any hints much appreciated.
Here's a shot at one:
shp <- structure(list(x = c(173L, 173L, 185L, 190L, 231L, 267L, 185L,
190L, 233L, 260L, 190L, 231L, 260L, 230L, 230L, 172L, 233L, 230L,
231L, 267L, 185L, 172L, 233L, 231L, 231L), y = c(299L, 299L,
321L, 360L, 361L, 377L, 321L, 360L, 363L, 309L, 360L, 361L, 309L,
322L, 322L, 378L, 363L, 322L, 391L, 377L, 321L, 378L, 363L, 391L,
361L), polygonID = c(2L, 4L, 5L, 3L, 6L, 7L, 2L, 5L, 7L, 6L,
2L, 3L, 4L, 5L, 6L, 2L, 3L, 4L, 3L, 6L, 4L, 3L, 6L, 7L, 5L)), class = "data.frame",
row.names = c(NA, -25L))
aa <- shp[ shp$polygonID == 2, ]
aa <- aa[ chull(aa[,1:2]), ]
aa
# x y polygonID
# 7 185 321 2
# 1 173 299 2
# 16 172 378 2
# 11 190 360 2
Now aa is a 4-point polygon, ordered clockwise. Let's hard-code some calculations for now:
indm1 <- c(ind[-1], ind[1])
indp1 <- c(ind[length(ind)], ind[-length(ind)])
angles <- ((atan2(aa$y[indm1] - aa$y[ind], aa$x[indm1] - aa$x[ind]) -
atan2(aa$y[indp1] - aa$y[ind], aa$x[indp1] - aa$x[ind])) * 180 / pi) %% 360
cbind(indm1,ind,indp1)
# indm1 ind indp1
# [1,] 2 1 4
# [2,] 3 2 1
# [3,] 4 3 2
# [4,] 1 4 3
angles
# [1] 158.69530 29.33568 44.27478 127.69424
And let's see (I was initially perplexed that it was not visually-correlating until I realized that the aspect-ratio was off, ergo asp=1).
plot(y~x, data=aa, type='l', asp=1)
with(aa, text(x-5, y, seq_len(nrow(aa)), col="red"))
with(aa, text(x+5, y, round(angles, 0)))
Okay, let's try to formalize this a little:
getangles <- function(aa) {
aa <- aa[chull(aa[,1:2]),]
ind <- seq_len(nrow(aa))
indm1 <- c(ind[-1], ind[1])
indp1 <- c(ind[length(ind)], ind[-length(ind)])
((atan2(aa$y[indm1] - aa$y[ind], aa$x[indm1] - aa$x[ind]) -
atan2(aa$y[indp1] - aa$y[ind], aa$x[indp1] - aa$x[ind])) * 180 / pi) %% 360
}
by(shp, shp$polygonID, getangles)
# shp$polygonID: 2
# [1] 158.69530 29.33568 44.27478 127.69424
# ------------------------------------------------------------
# shp$polygonID: 3
# [1] 130.91438 136.39718 133.60282 57.42594 81.65967
# ------------------------------------------------------------
# shp$polygonID: 4
# [1] 29.98564 54.83259 119.88349 155.29828
# ------------------------------------------------------------
# shp$polygonID: 5
# [1] 92.74183 81.42121 98.70294 87.13402
# ------------------------------------------------------------
# shp$polygonID: 6
# [1] 72.44870 111.95989 136.46880 157.38014 61.74247
# ------------------------------------------------------------
# shp$polygonID: 7
# [1] 71.70548 64.66388 43.63064
(There might be some issues with rounding/modulus, I'll leave it to you to beautify and verify the others.)
In my data example
data=structure(list(groupvar = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 1L), v1 = c(27L, 52L, 92L, 86L, NA, 19L, 94L, NA, 26L, 94L,
NA, 58L, 96L, 74L, 8L, 66L, 65L, 41L, 70L, 21L, 64L, 40L, 17L,
7L, NA, 14L, 63L), v2 = c(59L, 91L, 45L, 40L, 56L, 17L, 72L,
78L, 19L, 62L, 87L, NA, 79L, 62L, 40L, 67L, 93L, 1L, 64L, 22L,
NA, 98L, 44L, 85L, 67L, 88L, 92L), v3 = c(97L, 15L, 27L, 55L,
86L, 66L, NA, 61L, 27L, 47L, 93L, 68L, 72L, 4L, 35L, 69L, 65L,
NA, 83L, 60L, 42L, NA, 90L, 81L, NA, 27L, 60L)), .Names = c("groupvar",
"v1", "v2", "v3"), class = "data.frame", row.names = c(NA, -27L
))
There is groupvar (1 group and second group). I have many variable, but here only three.
And there are many missing values in these variables.
How can i perform multiple imputation for each variable(the type of variable can by numeric,int and so on), but for each group separately, using MICE
Edit
simple imp <- mice(data) is not give the need output, because i need by group
I want that the result was
groupvar v1 v2 v3
1 27 59 97
1 52 91 15
1 92 45 27
1 86 40 55
1 *64* 56 86
2 7 85 81
2 58*61,8* 68
2 64 *61,8* 42
** i marked example of imputed value
Group 'groupvar' as a factor.
data <- structure(list(groupvar = as.factor(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 1L)),
v1 = c(27L, 52L, 92L, 86L, NA, 19L, 94L, NA, 26L, 94L,
NA, 58L, 96L, 74L, 8L, 66L, 65L, 41L, 70L, 21L, 64L, 40L, 17L,
7L, NA, 14L, 63L),
v2 = c(59L, 91L, 45L, 40L, 56L, 17L, 72L,
78L, 19L, 62L, 87L, NA, 79L, 62L, 40L, 67L, 93L, 1L, 64L, 22L,
NA, 98L, 44L, 85L, 67L, 88L, 92L),
v3 = c(97L, 15L, 27L, 55L,
86L, 66L, NA, 61L, 27L, 47L, 93L, 68L, 72L, 4L, 35L, 69L, 65L,
NA, 83L, 60L, 42L, NA, 90L, 81L, NA, 27L, 60L)),
.Names = c("groupvar",
"v1", "v2", "v3"), class = "data.frame", row.names = c(NA, -27L
))
Then use the mice package assuming the mice package is properly installed.
library(mice)
imp <- mice(data)
complete(imp)
groupvar v1 v2 v3
1 1 27 59 97
2 1 52 91 15
3 1 92 45 27
4 1 86 40 55
5 1 21 56 86
6 1 19 17 66
7 1 94 72 4
8 1 66 78 61
9 1 26 19 27
10 2 94 62 47
11 2 8 87 93
12 2 58 72 68
13 2 96 79 72
14 2 74 62 4
15 2 8 40 35
16 2 66 67 69
17 2 65 93 65
18 2 41 1 47
19 2 70 64 83
20 2 21 22 60
21 2 64 62 42
22 1 40 98 27
23 1 17 44 90
24 2 7 85 81
25 1 63 67 55
26 2 14 88 27
27 1 63 92 60
I am new to this website and to coding as well. I was wondering if any of you could help me out
I need to calculate the Top 5 Movies, by rating distribution, calculating the percentage of ratings for each movie that are 4 stars or higher.
So far I was only able to calculate the number of occurrences using dplyr.
Is it possible to calculate it using dplyr (something similar to my coding)?
I'm not sure whether I need to mutate to come up with the solution or if there's another way to do so.
My code so far:
dfAux1 <- na.omit(dfAux)
dfAux1 %>%
group_by(movie) %>%
summarise(tot = n()) %>%
arrange(desc(tot))%>%
head(5)
the result should be something like this:
**Expected result**:
0.7000000, 'The Shawshank Redemption'
0.5333333, 'Star Wars IV - A New Hope'
0.5000000, 'Gladiator'
0.4444444, 'Blade Runner'
0.4375000, 'The Silence of the Lambs'
and so far this is my result:
# A tibble: 5 x 2
movie tot
<fctr> <int>
1 Toy Story 17
2 The Silence of the Lambs 16
3 Star Wars IV - A New Hope 15
4 Star Wars VI - Return of the Jedi 14
5 Independence Day 13
edit:
str(dfAux1)
'data.frame': 241 obs. of 2 variables:
$ Rating: int 1 5 4 2 4 5 4 2 3 2 ...
$ movie : Factor w/ 20 levels "Star Wars IV - A New Hope",..: 1 1 1 1 1 1 1 1 1 1 ...
- attr(*, "na.action")=Class 'omit' Named int [1:159] 3 4 7 16 17 23 27 28 34 36 ...
.. ..- attr(*, "names")= chr [1:159] "3" "4" "7" "16" ...
dput(dfAux1)
structure(list(Rating = c(1L, 5L, 4L, 2L, 4L, 5L, 4L, 2L, 3L,
2L, 3L, 4L, 4L, 5L, 1L, 5L, 3L, 3L, 3L, 4L, 1L, 2L, 1L, 5L, 3L,
4L, 5L, 1L, 2L, 2L, 4L, 4L, 3L, 5L, 2L, 3L, 1L, 1L, 2L, 2L, 5L,
1L, 4L, 1L, 4L, 5L, 5L, 5L, 4L, 4L, 4L, 2L, 4L, 1L, 3L, 2L, 3L,
2L, 4L, 2L, 5L, 3L, 4L, 1L, 5L, 4L, 2L, 1L, 1L, 4L, 2L, 4L, 5L,
5L, 2L, 1L, 4L, 2L, 1L, 4L, 2L, 3L, 2L, 4L, 4L, 5L, 2L, 4L, 3L,
2L, 2L, 4L, 2L, 2L, 2L, 3L, 4L, 1L, 5L, 4L, 3L, 5L, 2L, 1L, 3L,
4L, 4L, 2L, 3L, 4L, 1L, 3L, 2L, 5L, 3L, 2L, 3L, 4L, 1L, 1L, 4L,
1L, 4L, 5L, 1L, 3L, 2L, 2L, 3L, 5L, 5L, 1L, 2L, 3L, 5L, 2L, 3L,
1L, 2L, 1L, 4L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 1L, 5L, 2L, 4L,
1L, 4L, 3L, 1L, 2L, 2L, 3L, 4L, 2L, 3L, 2L, 4L, 3L, 4L, 3L, 2L,
2L, 4L, 5L, 2L, 1L, 5L, 1L, 4L, 5L, 2L, 3L, 3L, 2L, 5L, 5L, 4L,
1L, 3L, 1L, 2L, 1L, 5L, 5L, 2L, 4L, 2L, 4L, 2L, 5L, 2L, 5L, 5L,
1L, 5L, 1L, 3L, 2L, 2L, 3L, 5L, 1L, 3L, 1L, 5L, 3L, 3L, 1L, 2L,
4L, 1L, 5L, 3L, 1L, 1L, 5L, 5L, 1L, 5L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 2L, 5L, 4L, 2L, 1L, 4L, 5L), movie = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), .Label = c("Star Wars IV - A New Hope",
"Star Wars VI - Return of the Jedi", "Forrest Gump", "The Shawshank Redemption",
"The Silence of the Lambs", "Gladiator", "Toy Story", "Saving Private Ryan",
"Pulp Fiction", "Stand by Me", "Shakespeare in Love", "Total Recall",
"Independence Day", "Blade Runner", "Groundhog Day", "The Matrix",
"Schindler's List", "The Sixth Sense", "Raiders of the Lost Ark",
"Babe"), class = "factor")), .Names = c("Rating", "movie"), row.names = c(1L,
2L, 5L, 6L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L,
21L, 22L, 24L, 25L, 26L, 29L, 30L, 31L, 32L, 33L, 35L, 38L, 39L,
40L, 41L, 45L, 46L, 47L, 51L, 52L, 54L, 56L, 58L, 60L, 62L, 63L,
65L, 66L, 67L, 69L, 70L, 73L, 78L, 80L, 81L, 82L, 83L, 85L, 87L,
88L, 89L, 90L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 100L, 101L,
102L, 104L, 105L, 107L, 108L, 109L, 111L, 115L, 116L, 118L, 119L,
121L, 122L, 123L, 124L, 126L, 128L, 129L, 130L, 131L, 132L, 133L,
134L, 135L, 137L, 138L, 139L, 140L, 141L, 144L, 145L, 146L, 147L,
149L, 150L, 153L, 156L, 159L, 160L, 164L, 166L, 167L, 168L, 170L,
172L, 175L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 185L, 186L,
189L, 194L, 195L, 196L, 199L, 200L, 201L, 202L, 205L, 206L, 207L,
209L, 212L, 216L, 217L, 219L, 220L, 222L, 223L, 224L, 225L, 226L,
228L, 229L, 231L, 233L, 234L, 235L, 239L, 241L, 242L, 243L, 244L,
246L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L, 261L, 263L,
264L, 265L, 267L, 268L, 274L, 278L, 280L, 282L, 283L, 284L, 286L,
288L, 289L, 292L, 293L, 294L, 295L, 296L, 300L, 301L, 303L, 305L,
307L, 310L, 311L, 312L, 314L, 316L, 317L, 319L, 320L, 321L, 322L,
323L, 324L, 325L, 328L, 330L, 334L, 335L, 336L, 338L, 340L, 341L,
342L, 343L, 344L, 345L, 346L, 348L, 350L, 351L, 356L, 358L, 360L,
362L, 363L, 364L, 367L, 368L, 371L, 373L, 375L, 376L, 378L, 380L,
383L, 384L, 386L, 387L, 389L, 391L, 392L, 395L, 396L, 398L), class = "data.frame", na.action = structure(c(3L,
4L, 7L, 16L, 17L, 23L, 27L, 28L, 34L, 36L, 37L, 42L, 43L, 44L,
48L, 49L, 50L, 53L, 55L, 57L, 59L, 61L, 64L, 68L, 71L, 72L, 74L,
75L, 76L, 77L, 79L, 84L, 86L, 91L, 99L, 103L, 106L, 110L, 112L,
113L, 114L, 117L, 120L, 125L, 127L, 136L, 142L, 143L, 148L, 151L,
152L, 154L, 155L, 157L, 158L, 161L, 162L, 163L, 165L, 169L, 171L,
173L, 174L, 176L, 184L, 187L, 188L, 190L, 191L, 192L, 193L, 197L,
198L, 203L, 204L, 208L, 210L, 211L, 213L, 214L, 215L, 218L, 221L,
227L, 230L, 232L, 236L, 237L, 238L, 240L, 245L, 247L, 256L, 257L,
258L, 259L, 260L, 262L, 266L, 269L, 270L, 271L, 272L, 273L, 275L,
276L, 277L, 279L, 281L, 285L, 287L, 290L, 291L, 297L, 298L, 299L,
302L, 304L, 306L, 308L, 309L, 313L, 315L, 318L, 326L, 327L, 329L,
331L, 332L, 333L, 337L, 339L, 347L, 349L, 352L, 353L, 354L, 355L,
357L, 359L, 361L, 365L, 366L, 369L, 370L, 372L, 374L, 377L, 379L,
381L, 382L, 385L, 388L, 390L, 393L, 394L, 397L, 399L, 400L), .Names = c("3",
"4", "7", "16", "17", "23", "27", "28", "34", "36", "37", "42",
"43", "44", "48", "49", "50", "53", "55", "57", "59", "61", "64",
"68", "71", "72", "74", "75", "76", "77", "79", "84", "86", "91",
"99", "103", "106", "110", "112", "113", "114", "117", "120",
"125", "127", "136", "142", "143", "148", "151", "152", "154",
"155", "157", "158", "161", "162", "163", "165", "169", "171",
"173", "174", "176", "184", "187", "188", "190", "191", "192",
"193", "197", "198", "203", "204", "208", "210", "211", "213",
"214", "215", "218", "221", "227", "230", "232", "236", "237",
"238", "240", "245", "247", "256", "257", "258", "259", "260",
"262", "266", "269", "270", "271", "272", "273", "275", "276",
"277", "279", "281", "285", "287", "290", "291", "297", "298",
"299", "302", "304", "306", "308", "309", "313", "315", "318",
"326", "327", "329", "331", "332", "333", "337", "339", "347",
"349", "352", "353", "354", "355", "357", "359", "361", "365",
"366", "369", "370", "372", "374", "377", "379", "381", "382",
"385", "388", "390", "393", "394", "397", "399", "400"), class = "omit"))
I am using data.table instead of dplyr
library(data.table)
setDT(dfAux1) # make dfAux1 as data table by reference
# calculate total number by movies, then compute percent for `Rating >= 4` by movies and then sort `tot` by descending order and also eliminating duplicates in movies using `.SD[1]` which gives the first row in each movie.
dfAux1[, .(Rating, tot = .N), by = movie ][Rating >= 4, .(percent = .N/tot, tot), by = movie ][order(-tot), .SD[1], by = movie]
# movie percent tot
# 1: Toy Story 0.35294118 17
# 2: The Silence of the Lambs 0.43750000 16
# 3: Star Wars IV - A New Hope 0.53333333 15
# 4: Star Wars VI - Return of the Jedi 0.35714286 14
# 5: Independence Day 0.30769231 13
# 6: Gladiator 0.50000000 12
# 7: Total Recall 0.08333333 12
# 8: Groundhog Day 0.41666667 12
# 9: The Matrix 0.41666667 12
# 10: Schindler's List 0.33333333 12
# 11: The Sixth Sense 0.33333333 12
# 12: Saving Private Ryan 0.36363636 11
# 13: Pulp Fiction 0.36363636 11
# 14: Stand by Me 0.36363636 11
# 15: Shakespeare in Love 0.27272727 11
# 16: Raiders of the Lost Ark 0.27272727 11
# 17: Forrest Gump 0.30000000 10
# 18: The Shawshank Redemption 0.70000000 10
# 19: Babe 0.40000000 10
# 20: Blade Runner 0.44444444 9
A single line solution using data.table and data from OP could be as:
library(data.table)
setDT(dfAux1)[, .(pct = sum(Rating>=4)/.N), by=movie][order(-pct)][1:5]
movie pct
1: The Shawshank Redemption 0.7000000
2: Star Wars IV - A New Hope 0.5333333
3: Gladiator 0.5000000
4: Blade Runner 0.4444444
5: The Silence of the Lambs 0.4375000
Overview
I used the dplyr package to group your data by the movie column and perform calculations based on the rating column.
In summarise(), I created three new columns:
Total_Review: counts the total number of reviews per movie.
FourPlus_Rating: counts the subset of reviews with a Rating value of 4 or higher.
Per_FourPlus_Rating: divides FourPlus_Rating by Total_Review.
I then arranged the date in descending order based on Per_FourPlus_Rating. Finally, I called head() to specify that I only want the tibble to return the first 5 rows.
Reproducible Example
# install necessary package
install.packages( pkgs = "dplyr" )
# load necessary package
library( dplyr )
# view first six rows
head( x = df )
# Rating movie
# 1 1 Star Wars IV - A New Hope
# 2 5 Star Wars IV - A New Hope
# 5 4 Star Wars IV - A New Hope
# 6 2 Star Wars IV - A New Hope
# 8 4 Star Wars IV - A New Hope
# 9 5 Star Wars IV - A New Hope
# perform calculations using
# dplyr functions
df %>%
group_by( movie ) %>%
summarise( Total_Review = n()
, FourPlus_Rating = length( Rating[ which( Rating >= 4 ) ] )
, Per_FourPlus_Rating = length( Rating[ which( Rating >= 4 ) ] ) / n() ) %>%
arrange( desc( Per_FourPlus_Rating ) ) %>%
head( n = 5 )
# A tibble: 5 x 4
# movie Total_Review FourPlus_Rating Per_FourPlus_Rati…
# <fct> <int> <int> <dbl>
# 1 The Shawshank Rede… 10 7 0.700
# 2 Star Wars IV - A N… 15 8 0.533
# 3 Gladiator 12 6 0.500
# 4 Blade Runner 9 4 0.444
# 5 The Silence of the… 16 7 0.438
# end of script #
this is a dplyr solution:
dfAuxhigh=filter(dfAux1,Rating>=4)%>%group_by(movie)%>%summarize(percentHigh=n())
dfAux=dfAux1%>%group_by(movie)%>%summarize(percentAll=n())
result<-merge(dfAuxhigh,dfAux,by="movie")%>%mutate(percentage=percentHigh/percentAll)
result<-result[order(result$percentage,decreasing = T)[1:5],c(1,4)]
library(tidyverse)
df %>%
group_by(movie, Rating) %>%
summarise(n = n()) %>% #< get freq of movies
mutate(freq = n/sum(n)) %>% #< find perc for each rating, by movie
filter(Rating >=4) %>% #< filter for desired rating (4 or above)
summarise(freq = sum(freq)) %>% #< summarize again
top_n(5) %>%
arrange(desc(freq)) %>%
mutate(freq = paste0(round(freq*100, 2), "%"))
#> movie freq
#> 1 The Shawshank Redemption 70%
#> 2 Star Wars IV - A New Hope 53.33%
#> 3 Gladiator 50%
#> 4 Blade Runner 44.44%
#> 5 The Silence of the Lambs 43.75%
I'm trying to plot a facets in ggplot2 but I struggle to get the internal ordering of the different facets right. The data looks like this:
head(THAT_EXT)
ID FILE GENRE NODE
1 CKC_1823_01 CKC Novels better
2 CKC_1824_01 CKC Novels better
3 EW9_192_03 EW9 Popular Science better
4 H0B_265_01 H0B Popular Science sad
5 CS2_231_03 CS2 Academic Prose desirable
6 FED_8_05 FED Academic Prose certain
str(THAT_EXT)
'data.frame': 851 obs. of 4 variables:
$ ID : Factor w/ 851 levels "A05_122_01","A05_277_07",..: 345 346 439 608 402 484 319 395 228 5 ...
$ FILE : Factor w/ 241 levels "A05","A06","A0K",..: 110 110 127 169 120 135 105 119 79 2 ...
$ GENRE: Factor w/ 5 levels "Academic Prose",..: 4 4 5 5 1 1 1 5 1 5 ...
$ NODE : Factor w/ 115 levels "absurd","accepted",..: 14 14 14 89 23 16 59 59 18 66 ...
Part of the problem is that can't get the sorting right. Here is the code for the sorting of NODE that I use:
THAT_EXT <- within(THAT_EXT,
NODE <- factor(NODE,
levels=names(sort(table(NODE),
decreasing=TRUE))))
When I plot this with the code below I get a graphs in which the NODE is not correctly sorted in the individual GENREs since different NODEs are more frequent in different GENREs:
p1 <-
ggplot(THAT_EXT, aes(x=NODE)) +
geom_bar() +
scale_x_discrete("THAT_EXT", breaks=NULL) + # supress tick marks on x axis
facet_wrap(~GENRE)
What I want is for every facet to have NODE sorted in decreasing order for that particular GENRE. Can anyone help with this?
structure(list(ID = structure(c(1L, 2L, 3L, 4L, 10L, 133L, 137L,
138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L,
149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L,
160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L,
171L, 172L, 173L, 174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L,
182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L,
193L, 194L, 195L, 196L, 197L, 198L, 199L, 200L, 201L, 202L, 203L,
204L, 205L, 206L, 207L, 208L, 212L, 213L, 214L, 215L, 216L, 217L,
218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L, 227L, 228L,
229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L, 238L, 239L,
240L, 241L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 274L, 275L,
276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L, 284L, 290L, 291L,
298L, 299L, 300L, 303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L,
313L, 314L, 315L, 316L, 317L, 318L, 319L, 327L, 328L, 329L, 330L,
331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 340L, 341L,
342L, 343L, 344L, 345L, 346L, 347L, 348L, 352L, 353L, 354L, 355L,
356L, 357L, 358L, 359L, 360L, 349L, 350L, 351L, 361L, 362L, 363L,
364L, 365L, 366L, 367L, 368L, 369L, 370L, 371L, 372L, 373L, 374L,
375L, 376L, 377L, 378L, 379L, 380L, 381L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 41L, 42L, 43L, 44L, 45L,
46L, 50L, 54L, 72L, 73L, 74L, 75L, 76L, 90L, 91L, 92L, 97L, 98L,
102L, 115L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 209L,
210L, 211L, 242L, 243L, 244L, 245L, 246L, 289L, 292L, 293L, 294L,
295L, 296L, 297L, 301L, 302L, 311L, 312L, 320L, 321L, 322L, 323L,
324L, 325L, 326L, 382L, 383L, 384L, 385L, 386L, 387L, 388L, 5L,
6L, 7L, 8L, 9L, 11L, 37L, 38L, 39L, 40L, 47L, 48L, 49L, 51L,
52L, 53L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L,
66L, 67L, 68L, 69L, 70L, 71L, 77L, 78L, 79L, 80L, 81L, 82L, 83L,
84L, 85L, 86L, 87L, 88L, 89L, 93L, 94L, 95L, 96L, 99L, 100L,
101L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L,
113L, 114L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L,
134L, 135L, 136L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L,
255L, 256L, 257L, 258L, 259L, 260L, 261L, 262L, 263L, 264L, 265L,
266L, 285L, 286L, 287L, 288L), .Label = c("A05_122_01", "A05_277_07",
"A05_400_01", "A05_99_01", "A06_1283_02", "A06_1389_01", "A06_1390_01",
"A06_1441_02", "A06_884_03", "A0K_1190_03", "A77_1684_01", "A8K_525_03",
"A8K_582_01", "A8K_645_01", "A8K_799_01", "A90_341_02", "A90_496_01",
"A94_217_01", "A94_472_01", "A94_477_03", "A9M_164_01", "A9M_259_03",
"A9N_199_01", "A9N_489_01", "A9N_591_01", "A9R_173_01", "A9R_425_02",
"A9W_536_02", "AA5_121_01", "AAE_203_01", "AAE_243_01", "AAE_412_01",
"AAW_14_03", "AAW_244_02", "AAW_297_04", "AAW_365_04", "ADG_1398_01",
"ADG_1500_01", "ADG_1507_01", "ADG_1516_01", "AHB_336_01", "AHB_421_01",
"AHJ_1090_02", "AHJ_619_01", "AR3_340_01", "AR3_91_03", "ARF_879_01",
"ARF_985_01", "ARF_991_02", "ARK_1891_01", "ASL_33_04", "ASL_43_01",
"ASL_9_01", "AT7_1031_01", "B09_1162_01", "B09_1475_01", "B09_1493_01",
"B09_1539_01", "B0G_197_01", "B0G_320_01", "B0N_1037_01", "B0N_624_01",
"B0N_645_02", "B0N_683_01", "B3G_313_04", "B3G_320_03", "B3G_398_02",
"B7M_1630_01", "B7M_1913_01", "BNN_746_02", "BNN_895_01", "BP7_2426_01",
"BP7_2777_01", "BP7_2898_01", "BP9_410_01", "BP9_599_01", "BPK_829_01",
"C93_1407_02", "C9A_181_01", "C9A_196_01", "C9A_365_01", "C9A_82_02",
"C9A_9_01", "CB9_306_02", "CB9_63_04", "CB9_86_01", "CBJ_439_01",
"CBJ_702_02", "CBJ_705_01", "CCM_320_01", "CCM_665_01", "CCM_669_02",
"CCN_1036_02", "CCN_1078_01", "CCN_1119_01", "CCN_784_01", "CCW_2284_02",
"CCW_2349_03", "CE7_242_02", "CE7_284_01", "CE7_39_01", "CEB_1675_01",
"CER_145_03", "CER_23_01", "CER_235_02", "CER_378_10", "CET_1056_02",
"CET_680_01", "CET_705_01", "CET_797_01", "CET_838_01", "CET_879_05",
"CET_946_03", "CET_986_01", "CEY_2977_01", "CJ3_107_02", "CJ3_114_03",
"CJ3_20_01", "CJ3_81_01", "CK2_112_01", "CK2_22_01", "CK2_392_01",
"CK2_42_01", "CK2_75_01", "CKC_1776_01", "CKC_1777_01", "CKC_1823_01",
"CKC_1824_01", "CKC_1860_01", "CKC_1883_01", "CKC_1883_02", "CKC_2127_01",
"CMN_1439_02", "CRM_5767_01", "CRM_5770_03", "CRM_5789_01", "CS2_110_01",
"CS2_131_01", "CS2_139_01", "CS2_187_01", "CS2_187_03", "CS2_231_03",
"CS2_249_02", "CS2_301_01", "CS2_35_01", "CS2_58_02", "EV6_16_01",
"EV6_206_02", "EV6_240_01", "EV6_244_02", "EV6_28_01", "EV6_30_01",
"EV6_32_01", "EV6_450_01", "EV6_69_01", "EV6_80_01", "EV6_91_01",
"FAC_1019_01", "FAC_1026_01", "FAC_1027_01", "FAC_1235_01", "FAC_1269_05",
"FAC_1270_05", "FAC_1393_01", "FAC_1406_03", "FAC_933_01", "FAC_950_01",
"FAC_960_01", "FED_105_01", "FED_120_02", "FED_21_02", "FED_281_02",
"FED_302_02", "FED_53_01", "FED_8_05", "FEF_498_03", "FEF_674_03",
"FR2_410_01", "FR2_557_02", "FR2_593_01", "FR2_691_01", "FR4_232_01",
"FR4_331_01", "FR4_346_01", "FS7_818_01", "FS7_919_01", "FU0_368_02",
"FYT_1138_01", "FYT_1183_01", "FYT_901_05", "G08_1336_01", "G1E_385_01",
"G1N_824_01", "G1N_860_01", "G1N_868_01", "G1N_975_01", "GU5_854_01",
"GUJ_423_01", "GUJ_501_01", "GUJ_611_01", "GUJ_629_03", "GUJ_700_01",
"GV0_10_01", "GV0_104_01", "GV0_111_01", "GV0_122_01", "GV0_160_01",
"GV0_232_02", "GV2_1465_01", "GV2_1899_01", "GV6_2683_01", "GW6_297_01",
"GW6_306_05", "GW6_307_01", "GW6_322_01", "GW6_330_02", "GW6_335_01",
"GW6_338_01", "GW6_367_02", "GW6_373_01", "GW6_407_01", "GW6_411_01",
"GW6_413_01", "GW6_421_01", "GW6_423_01", "GW6_424_01", "GW6_428_01",
"GW6_447_01", "GWM_480_01", "GWM_533_02", "GWM_554_02", "GWM_554_03",
"GWM_609_01", "GWM_609_04", "GWM_610_01", "GWM_730_01", "GWM_731_01",
"GWM_738_01", "GWM_804_06", "GWM_815_01", "GWM_832_03", "GVP_179_01",
"GVP_211_01", "GVP_393_02", "GVP_443_02", "GVP_710_01", "H0B_171_04",
"H0B_216_01", "H0B_265_01", "H0B_32_01", "H0B_361_03", "H0B_365_01",
"H0B_369_01", "H0B_74_01", "H0B_93_01", "H10_1002_01", "H10_1032_04",
"H10_653_01", "H10_803_01", "H10_824_01", "H10_825_03", "H10_881_01",
"H10_986_01", "H78_851_04", "H78_891_01", "H78_946_04", "H79_1959_19",
"H7S_110_05", "H7S_130_06", "H7S_131_03", "H7S_131_04", "H7S_146_01",
"H7S_148_01", "H7S_164_01", "H7S_179_01", "H7S_54_01", "H7S_56_05",
"H7S_62_03", "H7S_79_01", "H7S_8_01", "H7S_81_01", "H7S_83_01",
"H7S_87_01", "H7S_92_03", "H7X_1028_02", "H7X_1091_01", "H7X_691_01",
"H7X_695_01", "H8H_2917_01", "H8K_153_01", "H8K_55_01", "H8M_1897_01",
"H8M_2104_02", "H8T_3316_03", "H98_3204_01", "H98_3410_01", "H98_3490_02",
"H9R_130_02", "H9R_39_01", "H9S_1297_01", "HA2_3107_02", "HA2_3284_01",
"HPY_754_04", "HPY_785_09", "HPY_799_03", "HPY_807_04", "HPY_830_04",
"HPY_838_02", "HPY_843_01", "HPY_869_11", "HR7_190_01", "HR7_440_01",
"HTP_540_01", "HTP_585_01", "HTP_588_05", "HTP_593_01", "HTP_601_01",
"HTP_613_01", "HTP_648_02", "HTW_197_01", "HTW_494_01", "HTW_750_01",
"HWL_2770_01", "HWL_2919_01", "HWM_45_01", "HWM_45_02", "HXY_1047_03",
"HXY_701_01", "HXY_781_01", "HXY_783_01", "HXY_784_01", "HXY_836_01",
"HXY_931_01", "HXY_963_01", "HXY_972_01", "HXY_985_03", "HY6_1024_01",
"HY6_1025_01", "HY6_1164_01", "HY6_1223_01", "HY6_988_03", "HY6_989_01",
"HY8_160_01", "HY8_164_01", "HY8_292_03", "HY8_316_01", "HY9_778_03",
"HY9_845_02", "HYX_235_08", "HYX_245_01", "HYX_88_01", "J12_1474_02",
"J12_1492_01", "J12_1571_01", "J12_1845_01", "J14_341_01", "J18_597_04",
"J18_698_02", "J18_759_01", "J18_828_01", "J3R_197_01", "J3R_219_02",
"J3R_277_04", "J3T_267_01", "J3T_269_02", "J3T_57_02", "J41_41_02",
"J41_58_03", "J9B_133_03", "J9B_341_02", "J9B_341_03", "J9D_147_05",
"J9D_218_01", "J9D_411_01", "J9D_616_01", "J9D_616_02", "JNB_563_02",
"JT7_118_01", "JT7_129_02", "JT7_218_02", "JT7_344_02", "JXS_3663_01",
"JXU_407_01", "JXU_468_02", "JXU_559_01", "JXV_1439_04", "JXV_1592_01",
"JY1_100_01"), class = "factor"), GENRE = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), .Label = c("Academic Prose", "Conversation", "News",
"Novels", "Popular Science"), class = "factor"), NODE = structure(c(9L,
10L, 10L, 10L, 4L, 10L, 71L, 35L, 49L, 6L, 5L, 15L, 28L, 44L,
64L, 64L, 28L, 28L, 18L, 18L, 32L, 18L, 58L, 10L, 72L, 28L, 18L,
10L, 64L, 10L, 35L, 64L, 64L, 69L, 8L, 10L, 50L, 69L, 49L, 49L,
15L, 69L, 10L, 49L, 8L, 64L, 49L, 10L, 69L, 18L, 61L, 67L, 67L,
61L, 57L, 69L, 11L, 10L, 64L, 10L, 59L, 61L, 49L, 10L, 59L, 1L,
61L, 35L, 54L, 54L, 39L, 44L, 61L, 64L, 69L, 1L, 23L, 49L, 49L,
8L, 69L, 49L, 69L, 49L, 49L, 69L, 35L, 49L, 49L, 49L, 35L, 10L,
49L, 48L, 10L, 49L, 11L, 44L, 50L, 11L, 50L, 69L, 49L, 10L, 59L,
68L, 47L, 69L, 49L, 35L, 29L, 8L, 49L, 50L, 35L, 10L, 35L, 8L,
35L, 8L, 10L, 35L, 10L, 10L, 10L, 35L, 44L, 61L, 35L, 44L, 28L,
47L, 39L, 39L, 49L, 61L, 43L, 60L, 19L, 10L, 10L, 10L, 44L, 44L,
62L, 44L, 10L, 59L, 10L, 61L, 1L, 53L, 33L, 10L, 8L, 8L, 64L,
64L, 10L, 57L, 61L, 64L, 66L, 19L, 61L, 64L, 10L, 10L, 8L, 19L,
35L, 28L, 10L, 61L, 35L, 42L, 35L, 28L, 32L, 64L, 10L, 18L, 28L,
25L, 35L, 35L, 10L, 18L, 10L, 22L, 55L, 28L, 10L, 1L, 55L, 51L,
1L, 38L, 28L, 28L, 33L, 10L, 44L, 29L, 16L, 8L, 28L, 69L, 32L,
10L, 61L, 20L, 35L, 10L, 28L, 10L, 32L, 10L, 46L, 59L, 64L, 35L,
66L, 2L, 35L, 28L, 30L, 18L, 69L, 32L, 10L, 28L, 17L, 36L, 64L,
61L, 10L, 64L, 33L, 3L, 37L, 26L, 28L, 64L, 44L, 28L, 64L, 64L,
6L, 6L, 64L, 50L, 32L, 8L, 64L, 50L, 28L, 24L, 18L, 47L, 35L,
40L, 24L, 55L, 44L, 22L, 1L, 49L, 44L, 18L, 45L, 63L, 64L, 35L,
12L, 35L, 10L, 35L, 10L, 10L, 10L, 44L, 44L, 44L, 65L, 44L, 55L,
32L, 49L, 64L, 39L, 69L, 1L, 60L, 7L, 14L, 44L, 33L, 10L, 19L,
10L, 70L, 53L, 8L, 61L, 61L, 44L, 61L, 65L, 28L, 68L, 69L, 27L,
61L, 28L, 72L, 34L, 61L, 32L, 10L, 49L, 35L, 49L, 10L, 10L, 69L,
39L, 40L, 19L, 59L, 53L, 49L, 49L, 44L, 49L, 35L, 49L, 61L, 61L,
1L, 10L, 28L, 49L, 35L, 49L, 61L, 50L, 69L, 35L, 61L, 35L, 50L,
10L, 28L, 69L, 61L, 21L, 69L, 29L, 35L, 35L, 35L, 11L, 69L, 8L,
41L, 56L, 35L, 61L, 69L, 49L, 49L, 49L, 1L, 13L, 64L, 64L, 52L,
44L, 64L, 64L, 50L, 49L, 69L, 11L, 59L, 49L, 31L), .Label = c("apparent",
"appropriate", "awful", "axiomatic", "best", "better", "breathtaking",
"certain", "characteristic", "clear", "conceivable", "convenient",
"crucial", "cruel", "desirable", "disappointing", "emphatic",
"essential", "evident", "expected", "extraordinary", "fair",
"fortunate", "Funny", "good", "great", "imperative", "important",
"impossible", "incredible", "inescapable", "inevitable", "interesting",
"ironic", "likely", "Likely", "lucky", "ludicrous", "natural",
"necessary", "needful", "notable", "noteworthy", "obvious", "odd",
"paradoxical", "plain", "plausible", "possible", "probable",
"proper", "relevant", "remarkable", "revealing", "right", "Sad",
"self-evident", "sensible", "significant", "striking", "surprising",
"symptomatic", "terrible", "true", "typical", "understandable",
"unexpected", "unfortunate", "unlikely", "unreasonable", "untrue",
"vital"), class = "factor")), .Names = c("ID", "GENRE", "NODE"
), class = "data.frame", row.names = c(NA, -388L))
As I mentioned already: facet_wrap is not intended for having individual scales. At least I didn't find a solution. Hence, setting the labels in scale_x_discrete did not bring the desired result.
But this my workaround:
library(plyr)
library(ggplot2)
nodeCount <- ddply( df, c("GENRE", "NODE"), nrow )
nodeCount$factors <- paste( nodeCount$GENRE, nodeCount$NODE, sep ="." )
nodeCount <- nodeCount[ order( nodeCount$GENRE, nodeCount$V1, decreasing=TRUE ), ]
nodeCount$factors <- factor( nodeCount$factors, levels=nodeCount$factors )
head(nodeCount)
GENRE NODE V1 factors
121 Popular Science possible 14 Popular Science.possible
128 Popular Science surprising 11 Popular Science.surprising
116 Popular Science likely 9 Popular Science.likely
132 Popular Science unlikely 9 Popular Science.unlikely
103 Popular Science clear 7 Popular Science.clear
129 Popular Science true 5 Popular Science.true
g <- ggplot( nodeCount, aes( y=V1, x = factors ) ) +
geom_bar() +
scale_x_discrete( breaks=NULL ) + # supress tick marks on x axis
facet_wrap( ~GENRE, scale="free_x" ) +
geom_text( aes( label = NODE, y = V1+2 ), angle = 45, vjust = 0, hjust=0, size=3 )
Which gives: