R, haven_labelled: Get label as value - r

I have a vector with the following structure:
structure(c(2, 6, 3, 6, 1, 5, 1, 1, 3, 3, 1, 2, 3, 1, 3, 2, 3,
8, 2, 3, 1, 8, 1, 1, 3, 1, 4, 3, 3, 12, 1, 4, 4, 3, 3, 2, 1,
2, 3, 1, 4, 7, 4, 1, 4, 7, 3, 2, 3, 2, 4, 1, 3, 4, 3, 4, 2, 2,
2, 2, 4, 1, 1, 1, 4, 2, 1, 4, 1, 1, 1, 1, 4, 2, 8, 1, 3, 3, 21,
4, 2, 9, 1, 4, 3, 1, 1, 2, 2, 2, 4, 3, 4, 4, 4, 4, 1, 2, 2, 5,
1, 1, 2, 4, 4, 3, 5, 1, 2, 23, 2, NA, NA, NA), label = "Please indicate the year and month of business establishment. - year", format.spss = "F40.0", display_width = 0L, class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(`2021` = 1, `2020` = 2, `2019` = 3,
`2018` = 4, `2017` = 5, `2016` = 6, `2015` = 7, `2014` = 8, `2013` = 9,
`2012` = 10, `2011` = 11, `2010` = 12, `2009` = 13, `2008` = 14,
`2007` = 15, `2006` = 16, `2005` = 17, `2004` = 18, `2003` = 19,
`2002` = 20, `2001` = 21, `2000` = 22, `1999` = 23, `1998` = 24,
`1997` = 25, `1996` = 26, `1995` = 27, `1994` = 28, `1993` = 29,
`1992` = 30, `1991` = 31, `1990` = 32, `1989` = 33, `1988` = 34,
`1987` = 35, `1986` = 36, `1985` = 37, `1984` = 38, `1983` = 39,
`1982` = 40, `1981` = 41, `1980` = 42, `1979` = 43, `1978` = 44,
`1977` = 45, `1976` = 46, `1975` = 47, `1974` = 48, `1973` = 49,
`1972` = 50, `1971` = 51, `1970` = 52, `1969` = 53, `1968` = 54,
`1967` = 55, `1966` = 56, `1965` = 57))
What I would like to have is a new vector with each value replaced by its label. So, for example, the first four entries in the new column would be (instead of 2, 6, 3, 6...) "2020", "2016", "2019", "2016"...
Any ideas on how to solve this economically?
Thank you very much!

Try
names(attr(df,"labels")[match(df,attr(df,"labels"))])
[1] "2020" "2016" "2019" "2016" "2021" "2017" "2021" "2021" "2019" "2019" "2021" "2020" "2019"
[14] "2021" "2019" "2020" "2019" "2014" "2020" "2019" "2021" "2014" "2021" "2021" "2019" "2021"
[27] "2018" "2019" "2019" "2010" "2021" "2018" "2018" "2019" "2019" "2020" "2021" "2020" "2019"
[40] "2021" "2018" "2015" "2018" "2021" "2018" "2015" "2019" "2020" "2019" "2020" "2018" "2021"
[53] "2019" "2018" "2019" "2018" "2020" "2020" "2020" "2020" "2018" "2021" "2021" "2021" "2018"
[66] "2020" "2021" "2018" "2021" "2021" "2021" "2021" "2018" "2020" "2014" "2021" "2019" "2019"
[79] "2001" "2018" "2020" "2013" "2021" "2018" "2019" "2021" "2021" "2020" "2020" "2020" "2018"
[92] "2019" "2018" "2018" "2018" "2018" "2021" "2020" "2020" "2017" "2021" "2021" "2020" "2018"
[105] "2018" "2019" "2017" "2021" "2020" "1999" "2020" NA NA NA

Related

GGplot Plotting Each Point Twice

I am trying to make an animated bubble chart for a baseball league I'm in. Once I create the animated graph and convert it into a gif, it plots each team twice, as shown in the picture below. The legend should only hold 14 points/teams, but it shows 28 instead.
My code is the following:
library(ggplot2)
library(gganimate)
library(readxl)
library(gifski)
library(png)
myData <- read_excel("~/Desktop/Dynasty - Fantasy Baseball.xlsx")
# Make a ggplot, but add frame=year: one image per year
g <- ggplot(myData, aes(PF, PA, size = `W%`, color = Team)) +
geom_point() +
theme_bw() +
# gganimate specific bits:
labs(title = 'Period: {frame_time-1900}', x = 'Points For', y = 'Points Against') +
transition_time(Year) +
ease_aes('linear')
# Save at gif:
anim_save(filename = "~/Desktop/FantasyBaseballAnimated.gif", animation = g)
My data is stored in the following:
structure(list(Team = c("Houston Astros", "Miami Marlins", "New York Mets",
"Atlanta Braves", "St. Louis Cardinals", "Cincinatti Reds", "Philadelphia Reds",
"Baltimore Orioles", "Milwaukee Brewers", "Washington Nationals",
"Montreal Expos", "Tampa Bay Rays", "Seattle Mariners", "Brooklyn Dodgers",
"Houston Astros", "Miami Marlins", "New York Mets", "Atlanta Braves",
"St. Louis Cardinals", "Cincinatti Reds", "Philadelphia Reds",
"Baltimore Orioles", "Milwaukee Brewers", "Washington Nationals",
"Montreal Expos", "Tampa Bay Rays", "Seattle Mariners", "Brooklyn Dodgers",
"New York Mets ", "St. Louis Cardinals ", "Cincinatti Reds ",
"Washington Nationals ", "Atlanta Braves ", "Miami Marlins ",
"Philadelphia Phillies ", "Tampa Bay Rays ", "Houston Astros ",
"Montreal Expos ", "Baltimore Orioles ", "Milwaukee Brewers ",
"Seattle Mariners ", "Brooklyn Dodgers ", "St. Louis Cardinals ",
"Washington Nationals ", "Miami Marlins ", "Cincinatti Reds ",
"New York Mets ", "Atlanta Braves ", "Tampa Bay Rays ", "Houston Astros ",
"Milwaukee Brewers ", "Philadelphia Phillies ", "Baltimore Orioles ",
"Montreal Expos ", "Seattle Mariners ", "Brooklyn Dodgers ",
"Washington Nationals ", "St. Louis Cardinals ", "Atlanta Braves ",
"Cincinatti Reds ", "New York Mets ", "Houston Astros ", "Miami Marlins ",
"Philadelphia Phillies ", "Tampa Bay Rays ", "Milwaukee Brewers ",
"Baltimore Orioles ", "Montreal Expos ", "Seattle Mariners ",
"Brooklyn Dodgers ", "St. Louis Cardinals ", "Washington Nationals ",
"Philadelphia Phillies ", "Miami Marlins ", "Atlanta Braves ",
"New York Mets ", "Houston Astros ", "Milwaukee Brewers ",
"Cincinatti Reds ", "Tampa Bay Rays ", "Montreal Expos ",
"Baltimore Orioles ", "Seattle Mariners ", "Brooklyn Dodgers ",
"New York Mets ", "St. Louis Cardinals ", "Washington Nationals ",
"Philadelphia Phillies ", "Miami Marlins ", "Houston Astros ",
"Atlanta Braves ", "Milwaukee Brewers ", "Cincinatti Reds ",
"Tampa Bay Rays ", "Montreal Expos ", "Baltimore Orioles ",
"Seattle Mariners ", "Brooklyn Dodgers ", "St. Louis Cardinals ",
"Washington Nationals ", "Houston Astros ", "New York Mets ",
"Philadelphia Phillies ", "Milwaukee Brewers ", "Atlanta Braves ",
"Miami Marlins ", "Cincinatti Reds ", "Tampa Bay Rays ", "Baltimore Orioles ",
"Montreal Expos ", "Seattle Mariners ", "Brooklyn Dodgers "
), W = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 9, 8,
7, 6, 6, 5, 6, 5, 4, 3, 2, 2, 2, 17, 17, 16, 14, 14, 14, 12,
11, 13, 7, 7, 6, 3, 3, 25, 24, 22, 21, 20, 20, 18, 19, 16, 14,
12, 9, 8, 5, 33, 32, 27, 27, 25, 26, 25, 23, 21, 21, 16, 15,
11, 7, 37, 37, 35, 34, 33, 32, 32, 29, 29, 27, 21, 19, 17, 7,
44, 43, 43, 40, 38, 40, 37, 37, 35, 32, 25, 23, 20, 7, 52, 50,
50, 48, 48, 43, 42, 40, 41, 38, 34, 28, 25, 8), L = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, 4, 6, 5, 6, 5, 6,
7, 8, 9, 10, 5, 5, 7, 7, 8, 9, 9, 9, 11, 14, 15, 15, 19, 21,
8, 9, 11, 13, 13, 13, 14, 16, 17, 19, 21, 22, 26, 31, 11, 12,
16, 19, 18, 19, 20, 22, 21, 22, 28, 28, 33, 40, 18, 18, 22, 22,
22, 22, 25, 25, 28, 27, 34, 36, 38, 52, 22, 22, 22, 28, 27, 29,
28, 28, 33, 31, 42, 42, 46, 64, 25, 27, 31, 30, 32, 33, 34, 37,
39, 37, 43, 51, 53, 75), T = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 1, 0, 2, 2, 1,
3, 2, 1, 3, 4, 0, 3, 2, 3, 2, 0, 3, 3, 3, 2, 3, 3, 4, 1, 3, 3,
3, 5, 2, 0, 4, 4, 5, 2, 5, 3, 3, 3, 6, 5, 4, 5, 4, 1, 5, 5, 3,
4, 5, 6, 3, 6, 3, 6, 5, 5, 5, 1, 6, 7, 7, 4, 7, 3, 7, 7, 4, 9,
5, 7, 6, 1, 7, 7, 3, 6, 4, 8, 8, 7, 4, 9, 7, 5, 6, 1), `W%` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.833, 0.792, 0.75, 0.667,
0.583, 0.5, 0.5, 0.5, 0.5, 0.417, 0.333, 0.25, 0.208, 0.167,
0.75, 0.75, 0.688, 0.646, 0.625, 0.604, 0.562, 0.542, 0.542,
0.354, 0.333, 0.312, 0.167, 0.125, 0.736, 0.708, 0.653, 0.611,
0.597, 0.597, 0.556, 0.542, 0.486, 0.431, 0.375, 0.319, 0.25,
0.139, 0.729, 0.708, 0.615, 0.583, 0.573, 0.573, 0.552, 0.51,
0.5, 0.49, 0.375, 0.365, 0.271, 0.156, 0.658, 0.658, 0.608, 0.6,
0.592, 0.583, 0.558, 0.533, 0.508, 0.5, 0.392, 0.358, 0.325,
0.125, 0.653, 0.646, 0.646, 0.583, 0.576, 0.576, 0.562, 0.562,
0.514, 0.507, 0.382, 0.368, 0.319, 0.104, 0.661, 0.637, 0.613,
0.607, 0.595, 0.56, 0.548, 0.518, 0.512, 0.506, 0.446, 0.363,
0.333, 0.101), `Div Rec` = c("0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0-0-0", "0-0-0", "37470",
"0-0-0", "0-0-0", "36683", "0-0-0", "36683", "0-0-0", "0-0-0",
"0-0-0", "37295", "0-0-0", "0-0-0", "17-5-2", "0-0-0", "36683",
"0-0-0", "36712", "36653", "0-0-0", "37295", "36594", "0-0-0",
"36683", "0-0-0", "0-0-0", "0-0-0", "37106", "36801", "36653",
"37207", "20-13-3", "13-10-1", "37512", "36594", "0-0-0", "36566",
"36683", "0-0-0", "36653", "0-0-0", "19-4-1", "37106", "13-10-1",
"37207", "25-18-5", "37541", "36754", "36843", "37512", "37381",
"36683", "0-0-0", "37482", "36931", "13-9-2", "19-4-1", "23-13-0",
"17-18-1", "13-10-1", "25-18-5", "37541", "37381", "13-21-2",
"15-19-2", "36683", "36683", "14-19-3", "36943", "25-18-5", "13-9-2",
"25-8-3", "28-19-1", "17-18-1", "18-16-2", "13-10-1", "13-8-3",
"19-26-3", "15-19-2", "36813", "37541", "17-27-4", "36943", "22-12-2",
"25-8-3", "18-16-2", "25-18-5", "28-19-1", "13-8-3", "13-10-1",
"17-18-1", "19-26-3", "15-19-2", "21-13-2", "13-23-0", "17-27-4",
"3-32-1"), GB = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.5, 1, 2, 3, 4, 4, 4, 4, 5, 6, 7, 7.5, 8, 0, 0, 1.5, 2.5, 3,
3.5, 4.5, 5, 5, 9.5, 10, 10.5, 14, 15, 0, 1, 3, 4.5, 5, 5, 6.5,
7, 9, 11, 13, 15, 17.5, 21.5, 0, 1, 5.5, 7, 7.5, 7.5, 8.5, 10.5,
11, 11.5, 17, 17.5, 22, 27.5, 0, 0, 3, 3.5, 4, 4.5, 6, 7.5, 9,
9.5, 16, 18, 20, 32, 0, 0.5, 0.5, 5, 5.5, 5.5, 6.5, 6.5, 10,
10.5, 19.5, 20.5, 24, 39.5, 0, 2, 4, 4.5, 5.5, 8.5, 9.5, 12,
12.5, 13, 18, 25, 27.5, 47), PF = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 10, 9.5, 9, 8, 7, 6, 6, 6, 6, 5, 4, 3, 2.5, 2,
18, 18, 16.5, 15.5, 15, 14.5, 13.5, 13, 13, 8.5, 8, 7.5, 4, 3,
26.5, 25.5, 23.5, 22, 21.5, 21.5, 20, 19.5, 17.5, 15.5, 13.5,
11.5, 9, 5, 35, 34, 29.5, 28, 27.5, 27.5, 26.5, 24.5, 24, 23.5,
18, 17.5, 13, 7.5, 39.5, 39.5, 36.5, 36, 35.5, 35, 33.5, 32,
30.5, 30, 23.5, 21.5, 19.5, 7.5, 47, 46.5, 46.5, 42, 41.5, 41.5,
40.5, 40.5, 37, 36.5, 27.5, 26.5, 23, 7.5, 55.5, 53.5, 51.5,
51, 50, 47, 46, 43.5, 43, 42.5, 37.5, 30.5, 28, 8.5), PA = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2.5, 3, 4, 5, 6, 6,
6, 6, 7, 8, 9, 9.5, 10, 6, 6, 7.5, 8.5, 9, 9.5, 10.5, 11, 11,
15.5, 16, 16.5, 20, 21, 9.5, 10.5, 12.5, 14, 14.5, 14.5, 16,
16.5, 18.5, 20.5, 22.5, 24.5, 27, 31, 13, 14, 18.5, 20, 20.5,
20.5, 21.5, 23.5, 24, 24.5, 30, 30.5, 35, 40.5, 20.5, 20.5, 23.5,
24, 24.5, 25, 26.5, 28, 29.5, 30, 36.5, 38.5, 40.5, 52.5, 25,
25.5, 25.5, 30, 30.5, 30.5, 31.5, 31.5, 35, 35.5, 44.5, 45.5,
49, 64.5, 28.5, 30.5, 32.5, 33, 34, 37, 38, 40.5, 41, 41.5, 46.5,
53.5, 56, 75.5), Period = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), Place = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14), Year = c(1900,
1900, 1900, 1900, 1900, 1900, 1900, 1900, 1900, 1900, 1900, 1900,
1900, 1900, 1901, 1901, 1901, 1901, 1901, 1901, 1901, 1901, 1901,
1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902, 1902, 1902,
1902, 1902, 1902, 1902, 1902, 1902, 1902, 1902, 1903, 1903, 1903,
1903, 1903, 1903, 1903, 1903, 1903, 1903, 1903, 1903, 1903, 1903,
1904, 1904, 1904, 1904, 1904, 1904, 1904, 1904, 1904, 1904, 1904,
1904, 1904, 1904, 1905, 1905, 1905, 1905, 1905, 1905, 1905, 1905,
1905, 1905, 1905, 1905, 1905, 1905, 1906, 1906, 1906, 1906, 1906,
1906, 1906, 1906, 1906, 1906, 1906, 1906, 1906, 1906, 1907, 1907,
1907, 1907, 1907, 1907, 1907, 1907, 1907, 1907, 1907, 1907, 1907,
1907)), row.names = c(NA, -112L), class = c("tbl_df", "tbl",
"data.frame"))
I thought factoring it would work, and also parsing it but neither worked:
#first thought
myData$Team <- factor(myData$Team)
summary(myData)
#second thought
myData$Team <- eval(parse(text = myData$Team))
Am I just missing something obvious? I'm drawing a blank at how I could fix this. Any help would be greatly appreciated!
It looks like you need to do some data cleaning:
data %>% group_by(Team) %>%
summarise(count = n())
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 28 x 2
Team count
<chr> <int>
1 "Atlanta Braves" 2
2 "Atlanta Braves " 6
3 "Baltimore Orioles" 2
4 "Baltimore Orioles " 6
5 "Brooklyn Dodgers" 2
6 "Brooklyn Dodgers " 6
7 "Cincinatti Reds" 2
8 "Cincinatti Reds " 6
9 "Houston Astros" 2
10 "Houston Astros " 6
# ... with 18 more rows
Using stringr:
data <- data %>%
mutate(Team = str_trim(Team, side = c("both")))
Answer
Remove the whitespace around the names:
myData$Team <- trimws(myData$Team)
Rationale
You actually have each team in there twice. Half just contain a single space at the end of their name. You may want to look into WHY that is happening.
table(myData$Team, myData$Year)[1:2, ]
# 1900 1901 1902 1903 1904 1905 1906 1907
# Atlanta Braves 1 1 0 0 0 0 0 0
# Atlanta Braves 0 0 1 1 1 1 1 1
sort(unique(myData$Team))[1:2]
#[1] "Atlanta Braves" "Atlanta Braves "

Stargazer descriptive table does not calculate data in R

I am trying to create a descriptive table (to export to LaTeX) with Stargazer. For some reason, in some datasets, like the one below, it only yields the first line (the column titles of the table).
I am not sure why this happens. It seems there is something wrong with my data. I saw a similar problem here, but the desired table was not of summary statistics. In my case, if I do the solution presente there (turn the dataset into a matrix), the table yielded is of the entire dataset, not descriptive stats.
Below is a code for a sample data and what I tried to produce the tables.
df <- structure(list(country = structure(c(26L, 5L, 26L, 25L, 25L,
3L, 27L, 27L, 27L, 6L, 27L, 26L, 6L, 10L, 1L, 5L, 27L, 20L, 27L,
10L, 26L, 1L, 2L, 15L, 10L, 1L, 26L, 17L, 9L, 7L, 11L, 1L, 26L,
20L, 27L, 6L, 1L, 25L, 9L, 2L, 22L, 15L, 23L, 27L, 9L, 27L, 8L,
27L, 19L, 24L, 20L, 19L, 22L, 26L, 26L, 10L, 10L, 16L, 20L, 25L,
2L, 26L, 1L, 27L, 11L, 1L, 2L, 8L, 25L, 27L, 26L, 9L, 10L, 4L,
9L, 27L, 7L, 19L, 27L, 26L, 6L, 17L, 11L, 16L, 16L, 26L, 27L,
26L, 27L, 14L, 23L, 27L, 23L, 10L, 9L, 23L, 22L, 26L, 26L, 27L
), .Label = c("Austria", "Belgium", "Bulgaria", "Cyprus", "Czech Republic",
"Denmark", "Estonia", "Finland", "France", "Germany", "Greece",
"Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg",
"Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia",
"Slovenia", "Spain", "Sweden", "United Kingdom"), class = "factor"),
ptyname_eng = structure(c("Centre Party", "Green Party",
"Moderate Party", "Europe of the Peoples-The Greens", "Europe of the Peoples-The Greens",
"Order, Lawfulness, and Justice (RZS)", "Labour", "Green Party",
"Labour", "Liberal Party", "Independent", "Liberal People's Party",
"Danish People's Party", "Free Democratic Party", "Austrian People's Party",
"Christian and Democratic Union-Czechoslovak People's Party",
"Green Party", "Coalition Agreement for the Future - CenterLeft",
"UK Independence Party (UKIP)", "Free Democratic Party",
"Centre Party", "The Greens", "Ecolo", "Libertas", "Free Voters",
"The Greens", "Centre Party", "Christian Social People's Party",
"Europe Écologie", "People's Union of Estonia", "Action",
"Communist Party of Austria", "Centre Party", "Coalition Agreement for the Future - CenterLeft",
"Jury Team", "Social Democrats", "Austrian People's Party",
"Union, Progress, and Democracy", "The Left Front (Left Party + French Communist Party)",
"Ecolo", "Greater Romania Party", "Harmony Centre", "Green Party",
"Green Party", "Socialist Party", "No2EU: Yes to Democracy",
"Social Democratic Party of Finland", "Conservatives", "Libertas",
"For Real", "Civic Platform", "Christian Union-Reformed Political Party",
"Democratic Liberal Party", "Sweden Democrats", "Green Party",
"Free Democratic Party", "Christian Democratic Union/Christian Social Union",
"Civic Democratic Party", "Civic Platform", "Union, Progress, and Democracy",
"Christian Democratic and Flemish Party", "Pirate Party",
"The Greens", "Socialist Labour Party SLP", "New Democracy",
"The Greens", "Christian Democratic and Flemish Party", "Left Alliance",
"Union, Progress, and Democracy", "British National Party (BNP)",
"Left Party", "The Left Front (Left Party + French Communist Party)",
"Christian Social Democrats", "Progressive Party of Working People",
"Socialist Party", "UK Independence Party (UKIP)", "Social Democratic Party",
"Democrats 66", "Mebyon Kernow", "June List", "Socialist People's Party",
"Christian Social People's Party", "New Democracy", "Frontas Party",
"Homeland Union - Lithuanian Christian Democrats", "Liberal People's Party",
"No2EU: Yes to Democracy", "Centre Party", "Pro Democracy: Libertas.eu",
"Anticapitalist List", "Conservative Democrats of Slovakia",
"Christian Party - Christian People's Alliance", "Slovak Democratic and Christian Union",
"Free Democratic Party", "Europe Écologie", "Direction - Social Democracy",
"Democratic Union of Hungarians in Romania", "Centre Party",
"Liberal People's Party", "Conservatives"), label = "Party name (in English)", format.stata = "%75s"),
votes = c(5.47, 2.06, 18.83, 2.49, 2.49, 4.67, 15.7, 8.6,
15.7, 20.23, NA, 13.58, 15.28, 11, 29.98, 7.64, 8.6, 2.44,
16.5, 11, 5.47, 9.93, 8.64, 4.3, 1.7, 9.93, 5.47, 31.32,
16.28, 2.2, 0.76, 0.66, 5.47, 2.44, 0.5, 21.49, 29.98, 2.85,
6.05, 8.64, 8.65, 19.57, 2.11, 8.6, 16.48, 1.01, 17.5, 27.7,
0.32, 9.76, 44.43, 6.82, 29.71, 3.27, 11.02, 11, 30.7, 1.35,
44.43, 2.85, 14.43, 7.13, 9.93, 1.1, 32.3, 9.93, 14.43, 5.9,
2.85, 6.2, 5.66, 6.05, 7.2, 34.9, 16.48, 16.5, 8.7, 11.32,
0.1, 3.55, 15.86, 31.32, 32.3, 2.43, 26.86, 13.58, 1.01,
5.47, 0.5, 3.39, 2.1, 1.6, 16.98, 11, 16.28, 32.01, 8.92,
5.47, 13.58, 27.7), seats = c(1, 0, 4, 1, 1, 0, 13, 2, 13,
3, NA, 3, 2, 12, 6, 2, 2, 0, 13, 12, 1, 2, 2, 0, 0, 2, 1,
3, 14, 0, 0, 0, 1, 0, 0, 4, 6, 1, 4, 2, 3, 2, 0, 2, 14, 0,
2, 25, 0, 1, 25, 2, 10, 0, 2, 12, 34, 0, 25, 1, 3, 1, 2,
0, 8, 2, 3, 0, 1, 2, 1, 4, 8, 2, 14, 13, 1, 3, 0, 0, 2, 3,
8, 0, 4, 3, 0, 1, 0, 0, 0, 0, 2, 12, 14, 5, 3, 1, 3, 25),
lsq = c(5.65121280548163, 11.0409569967897, 5.65121280548163,
2.09070598337411, 2.09070598337411, 18.4291883786975, 7.64222053188085,
7.64222053188085, 7.64222053188085, 8.49009926343377, NA,
5.65121280548163, 8.49009926343377, 4.03227351136326, 5.21894849144935,
11.0409569967897, 7.64222053188085, 4.87412556260095, 7.64222053188085,
4.03227351136326, 5.65121280548163, 5.21894849144935, 4.16813185258047,
9.99651439252703, 4.03227351136326, 5.21894849144935, 5.65121280548163,
14.683991850538, 10.16545, 10.829844309951, 3.4795321239576,
5.21894849144935, 5.65121280548163, 4.87412556260095, 7.64222053188085,
8.49009926343377, 5.21894849144935, 2.09070598337411, 10.1654489717407,
4.16813185258047, 2.66691671130863, 9.99651439252703, 7.88621704182489,
7.64222053188085, 10.1654489717407, 7.64222053188085, 6.48086623985829,
7.64222053188085, 3.63340749159794, 10.4201368043039, 4.87412556260095,
3.63340749159794, 2.66691671130863, 5.65121280548163, 5.65121280548163,
4.03227351136326, 4.03227351136326, 8.7655582315738, 4.87412556260095,
2.09070598337411, 4.16813185258047, 5.65121280548163, 5.21894849144935,
7.64222053188085, 3.4795321239576, 5.21894849144935, 4.16813185258047,
6.48086623985829, 2.09070598337411, 7.64222053188085, 5.65121280548163,
10.1654489717407, 4.03227351136326, 6.81012399952372, 10.1654489717407,
7.64222053188085, 10.829844309951, 3.63340749159794, 7.64222053188085,
5.65121280548163, 8.49009926343377, 14.683991850538, 3.4795321239576,
8.7655582315738, 8.7655582315738, 5.65121280548163, 7.64222053188085,
5.65121280548163, 7.64222053188085, 6.15555836341693, 7.88621704182489,
7.64222053188085, 7.88621704182489, 4.03227351136326, 10.16545,
7.88621704182489, 2.66691671130863, 5.65121280548163, 5.65121280548163,
7.64222053188085), v020_03 = c(5, 5, 3, 5, 5, 3, 5, 5, 3,
NA, 5, 5, 3, 4, 3, 2, 5, NA, 4, 5, 5, 5, 5, NA, 2, 5, 4,
5, 5, 2, NA, NA, 5, 5, 2, 5, 3, 4, 5, 5, 3, 5, 5, 5, 5, 5,
5, 3, 3, NA, 2, 2, 3, 1, 5, 5, 1, 3, 3, NA, 1, 5, 5, 5, 3,
5, 5, 5, 4, 2, NA, 5, 1, 3, 2, 3, NA, 5, 5, 4, NA, 4, 2,
NA, 2, 5, 5, 5, 3, 5, 2, 1, 3, 3, 5, NA, 4, 5, 5, 1), v020_04 = c(5,
4, NA, 1, 3, 5, 3, 2, 2, NA, 2, 4, 4, 5, 5, 2, 1, NA, 4,
5, 5, 2, 5, NA, 3, 3, 4, 4, 3, 4, NA, NA, 3, 3, 4, 1, 5,
4, 2, 5, 1, 5, 3, 4, 2, 1, 4, 5, 3, NA, 3, 4, 4, 4, 3, 5,
5, 2, 4, NA, 1, 1, 3, 1, 3, 2, 2, 2, 3, 3, NA, 1, 1, 3, 2,
4, NA, 5, 2, 3, NA, 5, 2, NA, 2, 2, 1, 4, 5, 3, 4, 3, 4,
5, 1, NA, 3, 2, 4, 5), v020_08 = c(5, 3, 3, 1, 3, 5, 3, 1,
2, NA, 2, 3, 4, 5, 5, 5, 1, NA, 5, 5, NA, 2, 3, NA, 4, 2,
3, 4, 1, 3, NA, NA, 3, 2, 3, 1, 4, 2, 1, 3, 1, 2, 3, 2, 1,
1, 3, 3, 3, NA, 4, 2, 4, 3, 2, 2, 5, 3, 4, NA, 2, 3, 1, 1,
3, 3, 2, 2, 3, 3, NA, 1, 5, 4, 1, 3, NA, 4, 2, 3, NA, 2,
3, NA, 2, 4, 1, 4, 1, 1, 4, 3, 4, 4, 1, NA, 5, 5, 3, 3),
v018_1 = c(8, 5, 5, 1, 3, 9, 3, 2, 2, NA, NA, 5, 5, 5, 7,
7, 1, NA, 7, 5, 6, 4, 3, NA, 8, 4, 5, 6, 0, 5, NA, NA, 5,
2, 7, 3, 8, 4, 0, 4, 0, 7, 7, NA, 0, 0, 3, 8, 5, NA, 7, 5,
10, 8, 4, 5, 8, 5, 8, NA, 5, 2, 0, 10, 6, 3, 3, 2, 4, 6,
NA, 0, 8, 4, 0, 6, NA, 4, 3, 5, NA, 7, 6, NA, 9, 8, 0, 6,
5, 0, 8, 6, 8, 5, 2, NA, 6, 5, 7, 7), v020_05 = c(1, 1, 1,
1, 3, 1, 1, 1, 2, NA, 1, 2, 4, 2, 4, 3, 1, NA, 3, 2, 1, 1,
4, NA, 2, 1, 2, 3, 1, 1, NA, NA, 2, 1, 1, 1, 2, 4, 1, 2,
1, 1, 1, 1, 1, 1, 1, 1, 3, NA, 5, 4, 1, 4, 2, 2, 1, 4, 2,
NA, 3, 1, 1, 1, 1, 1, 2, 1, 3, 4, NA, 1, 4, 3, 1, 3, NA,
1, 1, 2, NA, 4, 1, NA, 4, 2, 1, 1, 3, 1, 4, 2, 2, 4, 1, NA,
1, 2, 1, 5), v020_02 = c(1, 3, 1, 5, 4, 1, 3, 5, 4, NA, 3,
2, 3, 2, 1, 3, 5, NA, 1, 1, 1, 5, 4, NA, 1, 2, 2, 1, 5, 2,
NA, NA, 3, 3, 1, 5, 3, 2, 5, 4, 1, 2, 2, 5, 5, 5, 3, 3, 1,
NA, 1, 4, 1, 2, 4, 1, 1, 3, 1, NA, 3, 4, 4, 4, 3, 4, 5, 4,
2, 3, NA, 5, 1, 2, 2, 1, NA, 2, 3, 2, NA, 1, 2, NA, 2, 2,
5, 2, 1, 4, 2, 4, 2, 2, NA, NA, 2, 3, 1, 1)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
stargazer (df)
stargazer(as.matrix(df)
df <- as.matrix(df)
stargazer(df)
Convert the data to dataframe.
stargazer::stargazer(data.frame(df))
With type = 'text'
stargazer::stargazer(data.frame(df), type = 'text')
===========================================================
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
-----------------------------------------------------------
votes 99 11.784 10.443 0.100 3.330 16.380 44.430
seats 99 4.515 6.649 0.000 0.500 4.000 34.000
lsq 99 6.627 2.903 2.091 4.521 7.886 18.429
v020_03 88 3.852 1.352 1.000 3.000 5.000 5.000
v020_04 87 3.172 1.340 1.000 2.000 4.000 5.000
v020_08 87 2.828 1.278 1.000 2.000 4.000 5.000
v018_1 86 4.767 2.660 0.000 3.000 7.000 10.000
v020_05 88 1.966 1.198 1.000 1.000 3.000 5.000
v020_02 87 2.667 1.403 1.000 1.000 4.000 5.000
-----------------------------------------------------------
We can also convert to tibble
stargazer::stargazer(as_tibble(df))

Tried code in R with mutate_at and max() functions with own data. Warning messages come up: no non-missing arguments to max

I'm curretly learning R with a book and was trying a mutate_at function from dplyr. In this example I want to standardize the survey items on a scale from 0 to 1. To do this, we can divide each value by the (theoretical) maximum value of the scale.
The book example stats_test from the package "pradadata" works perfectly fine:
data(stats_test, package = "pradadata")
stats_test %>%
drop_na() %>%
mutate_at(.vars = vars(study_time, self_eval, interest),
.funs = funs(prop = ./max(.))) %>%
select(contains("_prop"))
Output:
study_time_prop self_eval_prop interest_prop
<dbl> <dbl> <dbl>
1 0.6 0.7 0.667
2 0.8 0.8 0.833
3 0.6 0.4 0.167
4 0.8 0.7 0.833
5 0.4 0.6 0.5
6 0.4 0.6 0.667
7 0.8 0.6 0.5
8 0.2 0.7 0.667
9 0.6 0.8 0.833
10 0.6 0.7 0.833
# ... with 1,617 more rows
Tried the same code with my own data but it doesn't work and I can't figure out why. The variable RG04 from my data has a range from 1-5. I tried to transform the variable from numeric to integer, because the variables from the the data stats_test are integer too:
df_literacy_2 <- transform(df_literacy, RG04 = as.integer(RG04))
df_literacy_2 <- tibble(df_literacy_2)
df_literacy_2 %>%
drop_na() %>%
mutate_at(.vars = vars(RG04),
.funs = funs(prop = ./max(.))) %>%
select(contains("_prop"))
Output:
# A tibble: 0 x 0
Warning messages:
1: Problem with `mutate()` input `prop`.
i no non-missing arguments to max; returning -Inf
i Input `prop` is `RG04/max(RG04)`.
2: In base::max(x, ..., na.rm = na.rm) :
no non-missing arguments to max; returning -Inf
str(df_literacy_2$RG04)
int [1:630] 2 4 2 1 2 2 1 3 1 3 ...
Why doesn't it work on my data?
Thank you for your help.
Edit with sample of df_literacy:
> dput(head(df_literacy,20))
structure(list(CASE = c(40, 41, 44, 45, 48, 49, 54, 55, 56, 57,
58, 61, 62, 63, 64, 65, 66, 67, 68, 69), SERIAL = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), REF = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), QUESTNNR = c("base", "base",
"base", "base", "base", "base", "base", "base", "base", "base",
"base", "base", "base", "base", "base", "base", "base", "base",
"base", "base"), MODE = c("interview", "interview", "interview",
"interview", "interview", "interview", "interview", "interview",
"interview", "interview", "interview", "interview", "interview",
"interview", "interview", "interview", "interview", "interview",
"interview", "interview"), STARTED = structure(c(1607290462,
1607290608, 1607291086, 1607291118, 1607291265, 1607291793, 1607294071,
1607294336, 1607294337, 1607294419, 1607294814, 1607296474, 1607301809,
1607329348, 1607333933, 1607335996, 1607336207, 1607336378, 1607343194,
1607343414), tzone = "UTC", class = c("POSIXct", "POSIXt")),
EI01 = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Ja",
"Nein", "Nicht beantwortet"), class = "factor"), EI02 = c(2,
2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3),
RF01 = c(4, 2, 4, 3, 4, 4, 1, 3, 2, 3, 4, 3, 2, 3, 2, 2,
4, 2, 5, 3), RF02 = c(1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1,
1, 1, 2, 2, 2, 2, 2, 2), RF03 = c(1, 2, 2, 2, 1, 2, 1, 1,
1, 1, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2), RG01 = c(2, 2, 2, 2,
2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2), RG02 = c(3,
3, 3, 3, 4, 3, 4, 2, 4, 2, 3, 4, 4, 2, 4, 3, 4, 3, 4, 4),
RG03 = c(3, 2, 2, 3, 3, 3, 1, 3, 1, 2, 3, 1, 2, 2, 1, 3,
2, 3, 2, 2), RG04 = c(2, 4, 2, 1, 2, 2, 1, 3, 1, 3, 2, 4,
1, 1, 1, 1, 1, 2, 4, 1), RG05 = c(1, 1, 1, 1, 1, 1, 1, 2,
1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1), SD01 = structure(c(2L,
1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("weiblich", "männlich", "divers",
"nicht beantwortet"), class = "factor"), SD03 = c(4, 3, 2,
2, 1, 2, 4, 4, 1, 4, 3, 1, 2, 3, 2, 4, 2, 3, 1, 3), SD05_01 = c(23,
22, 22, 21, 18, 22, 21, 27, 17, 22, 17, 21, 21, 22, 50, 25,
23, 20, 23, 23), TIME001 = c(2, 3, 23, 73, 29, 2, 3, 3, 29, 7,
50, 55, 3, 2, 10, 2, 1, 5, 7, 35), TIME002 = c(2, 2, 16,
34, 12, 14, 2, 2, 21, 2, 30, 24, 21, 3, 3, 2, 3, 2, 3, 22
), TIME003 = c(34, 8, 12, 15, 13, 12, 12, 7, 13, 11, 16,
10, 11, 16, 8, 8, 7, 8, 11, 14), TIME004 = c(60, 33, 25,
31, 45, 25, 14, 13, 38, 35, 50, 50, 37, 32, 32, 25, 72, 55,
28, 29), TIME005 = c(84, 21, 29, 41, 54, 33, 30, 22, 32,
42, 44, 23, 65, 30, 28, 32, 51, 31, 27, 44), TIME006 = c(14,
9, 27, 11, 24, 8, 8, 9, 18, 12, 35, 33, 27, 46, 11, 15, 8,
14, 12, 14), TIME007 = c(3, 18, 3, 5, 6, 2, 9, 2, 3, 3, 6,
7, 3, 13, 4, 4, 378, 3, 4, 10), TIME_SUM = c(199, 94, 135,
142, 183, 96, 78, 58, 154, 112, 186, 152, 167, 142, 96, 88,
146, 118, 92, 168), MAILSENT = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
LASTDATA = structure(c(1607290661, 1607290702, 1607291221,
1607291328, 1607291448, 1607291889, 1607294149, 1607294394,
1607294491, 1607294531, 1607295045, 1607296676, 1607301976,
1607329490, 1607334030, 1607336084, 1607336727, 1607336496,
1607343286, 1607343582), tzone = "UTC", class = c("POSIXct",
"POSIXt")), FINISHED = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), Q_VIEWER = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), LASTPAGE = c(7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7),
MAXPAGE = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7), MISSING = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 0, 7, 7, 7), MISSREL = c(1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1), TIME_RSI = c("46023",
"14246", "0.75", "0.63", "0.54", "12055", "17533", "30682",
"0.7", "44197", "0.45", "0.58", "0.83", "44378", "44501",
"18629", "46753", "46388", "44197", "0.57"), DEG_TIME = c(27,
27, 3, 1, 0, 23, 30, 42, 2, 17, 0, 2, 7, 18, 10, 27, 43,
18, 8, 0)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Edit with TRUE and FALSE NAs:
> sapply(df_literacy, function(a) table(c(T,F,is.na(a)))-1)
CASE SERIAL REF QUESTNNR MODE STARTED EI01 EI02 RF01 RF02 RF03 RG01 RG02 RG03 RG04 RG05 SD01 SD03 SD05_01 TE03_01 TIME001 TIME002 TIME003
FALSE 630 0 0 630 630 630 630 630 630 630 630 630 630 630 630 630 629 629 615 99 630 630 630
TRUE 0 630 630 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 15 531 0 0 0
TIME004 TIME005 TIME006 TIME007 TIME_SUM MAILSENT LASTDATA FINISHED Q_VIEWER LASTPAGE MAXPAGE MISSING MISSREL TIME_RSI DEG_TIME
FALSE 630 630 629 625 630 0 630 630 630 630 630 630 630 630 630
TRUE 0 0 1 5 0 630 0 0 0 0 0 0 0 0 0
There are a few things to correct here.
drop_na() is removing all of your data.
drop_na(df_literacy)
# # A tibble: 0 x 37
# # ... with 37 variables: CASE <dbl>, SERIAL <lgl>, REF <lgl>, QUESTNNR <chr>,
# # MODE <chr>, STARTED <dttm>, EI01 <fct>, EI02 <dbl>, RF01 <dbl>, RF02 <dbl>,
# # RF03 <dbl>, RG01 <dbl>, RG02 <dbl>, RG03 <dbl>, RG04 <dbl>, RG05 <dbl>,
# # SD01 <fct>, SD03 <dbl>, SD05_01 <dbl>, TIME001 <dbl>, TIME002 <dbl>,
# # TIME003 <dbl>, TIME004 <dbl>, TIME005 <dbl>, TIME006 <dbl>, TIME007 <dbl>,
# # TIME_SUM <dbl>, MAILSENT <lgl>, LASTDATA <dttm>, FINISHED <dbl>,
# # Q_VIEWER <dbl>, LASTPAGE <dbl>, MAXPAGE <dbl>, MISSING <dbl>,
# # MISSREL <dbl>, TIME_RSI <chr>, DEG_TIME <dbl>
The problem is that you have several columns that are completely NA, namely SERIAL, REF, and MAILSENT.
sapply(df_literacy, function(a) table(c(T,F,is.na(a)))-1)
# CASE SERIAL REF QUESTNNR MODE STARTED EI01 EI02 RF01 RF02 RF03 RG01 RG02
# FALSE 20 0 0 20 20 20 20 20 20 20 20 20 20
# TRUE 0 20 20 0 0 0 0 0 0 0 0 0 0
# RG03 RG04 RG05 SD01 SD03 SD05_01 TIME001 TIME002 TIME003 TIME004 TIME005
# FALSE 20 20 20 20 20 20 20 20 20 20 20
# TRUE 0 0 0 0 0 0 0 0 0 0 0
# TIME006 TIME007 TIME_SUM MAILSENT LASTDATA FINISHED Q_VIEWER LASTPAGE
# FALSE 20 20 20 0 20 20 20 20
# TRUE 0 0 0 20 0 0 0 0
# MAXPAGE MISSING MISSREL TIME_RSI DEG_TIME
# FALSE 20 20 20 20 20
# TRUE 0 0 0 0 0
Drop the drop_na(), or at least drop_na(-SERIAL, -REF, -MAILSENT).
Your code is using funs, which has been deprecated since dplyr-0.8.0.
# Warning: `funs()` is deprecated as of dplyr 0.8.0.
# Please use a list of either functions or lambdas:
# # Simple named list:
# list(mean = mean, median = median)
# # Auto named with `tibble::lst()`:
# tibble::lst(mean, median)
# # Using lambdas
# list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
While this isn't causing an error, it is causing a warning (and will likely stop working at some point. Change your mutate_at to be:
mutate_at(.vars = vars(RG04, RF02),
.funs = list(prop = ~ . / max(.)))
You are using a single variable within .vars and a single function within .funs, so the column names are preserved as-is (and you will not see a _prop column). From ?mutate_at:
The names of the new columns are derived from the names of the
input variables and the names of the functions.
• if there is only one unnamed function (i.e. if '.funs' is an
unnamed list of length one), the names of the input variables
are used to name the new columns;
• for _at functions, if there is only one unnamed variable
(i.e., if '.vars' is of the form 'vars(a_single_column)') and
'.funs' has length greater than one, the names of the
functions are used to name the new columns;
• otherwise, the new names are created by concatenating the
names of the input variables and the names of the functions,
separated with an underscore '"_"'.
If you aren't going to add more variables and functions, then you need to self-name it in the call, as in mutate_at(.vars = vars(RG04 = RG04), ...). Oddly enough, this causes it to produce RG04_prop.
If we fix all of those, then it works.
df_literacy %>%
drop_na(-SERIAL, -REF, -MAILSENT) %>%
mutate_at(.vars = vars(RG04 = RG04),
.funs = list(prop = ~ ./max(.))) %>%
select(contains("_prop")) %>%
head(3)
# A tibble: 3 x 1
# RG04_prop
# <dbl>
# 1 0.5
# 2 1
# 3 0.5

mutate_at & vars: Can I tell r / dplyr to overwrite existing features instead of creating new ones?

[If downvoting feedback would be helpful so I can try to update the post.]
library(tidyverse)
example_mtcars <- mtcars %>% mutate_at(vars(disp, wt, qsec), funs(as.character(.)))
example_mtcars$disp[c(2,4,8)] <- "NULL"
example_mtcars$wt[c(10, 12)] <- "NULL"
example_mtcars$qsec[c(2,3,4)] <- "NULL"
processed_mtcars <- example_mtcars %>%
mutate_at(vars(c(disp, wt:qsec)),
funs(str_replace(., "NULL", "0"),
as.numeric))
The new data frame processed_mtcars has new features where I would like the feature names to be the exact same as example_mtcars but with the transformations applied.
> glimpse(processed_mtcars)
Observations: 32
Variables: 17
$ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, ...
$ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4
$ disp <chr> "160", "NULL", "108", "NULL", "360", "225", "360", "NULL", "140.8", "167.6", "167.6", "275.8", "...
$ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97...
$ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3.00, ...
$ wt <chr> "2.62", "2.875", "2.32", "3.215", "3.44", "3.46", "3.57", "3.19", "3.15", "NULL", "3.44", "NULL"...
$ qsec <chr> "16.46", "NULL", "NULL", "NULL", "17.02", "20.22", "15.84", "20", "22.9", "18.3", "18.9", "17.4"...
$ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1
$ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1
$ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4
$ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2
$ disp_str_replace <chr> "160", "0", "108", "0", "360", "225", "360", "0", "140.8", "167.6", "167.6", "275.8", "275.8", "...
$ wt_str_replace <chr> "2.62", "2.875", "2.32", "3.215", "3.44", "3.46", "3.57", "3.19", "3.15", "0", "3.44", "0", "3.7...
$ qsec_str_replace <chr> "16.46", "0", "0", "0", "17.02", "20.22", "15.84", "20", "22.9", "18.3", "18.9", "17.4", "17.6",...
$ disp_as.numeric <dbl> 160.0, NA, 108.0, NA, 360.0, 225.0, 360.0, NA, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472.0, ...
$ wt_as.numeric <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, NA, 3.440, NA, 3.730, 3.780, 5.25...
$ qsec_as.numeric <dbl> 16.46, NA, NA, NA, 17.02, 20.22, 15.84, 20.00, 22.90, 18.30, 18.90, 17.40, 17.60, 18.00, 17.98, ...
Example the second function to funs is as.numeric. However it's referencing the original, untransformed features not the ones where "NULL" has just been replaced with "0". So there are multiple NA values.
Perhaps one of the following is what you're after:
example_mtcars %>%
mutate_at(
vars(c(disp, wt:qsec)),
funs(str_replace(., "NULL", "0") %>% as.numeric)
)
or this:
example_mtcars %>%
mutate_at(
vars(c(disp, wt:qsec)),
funs(str_replace(., "NULL", NA_character_) %>% as.numeric)
)

dplyr: How does bind_rows() change the original dataframe

hth1 is a data frame that I already have.
> hth1
Source: local data frame [13 x 14]
Groups: team [13]
team CSK DC DD GL KKR KTK KXIP MI PW RCB RPSG
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CSK 0 8 11 0 11 2 9 10 4 10 0
2 DC 2 0 8 0 2 1 7 5 3 8 0
3 DD 5 3 0 0 7 2 8 5 2 10 2
4 GL 0 0 2 0 0 0 0 0 0 1 0
5 KKR 5 7 10 2 0 0 5 10 3 15 0
6 KTK 0 0 0 0 2 0 1 0 1 2 0
7 KXIP 8 3 10 2 14 0 0 11 2 6 1
8 MI 12 5 13 2 8 1 7 0 3 11 1
9 PW 2 1 4 0 2 0 4 3 0 1 0
10 RCB 9 3 7 2 3 0 12 8 4 0 1
11 RPSG 0 0 0 2 2 0 1 1 0 1 0
12 RR 8 2 7 0 14 1 7 6 2 7 0
13 SH 3 0 4 0 5 0 4 5 2 5 2
# ... with 2 more variables: RR <dbl>, SH <dbl>
Why do the data frame returned by bind_rows() and the original data frame differ?
> h <- list(hth1)
> hth_b1 <- bind_rows(h)
> identical(hth1, hth_b1)
[1] FALSE
> class(hth_b1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> class(hth1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> setequal(hth1, hth_b1)
TRUE
> anti_join(hth1, hth_b1)
Joining, by = c("team", "CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH")
Source: local data frame [0 x 14]
Groups: team [13]
# ... with 14 variables: team <chr>, CSK <dbl>, DC <dbl>, DD <dbl>, GL <dbl>,
# KKR <dbl>, KTK <dbl>, KXIP <dbl>, MI <dbl>, PW <dbl>, RCB <dbl>,
# RPSG <dbl>, RR <dbl>, SH <dbl>
What am I missing? I have been stuck here for a long time.
Update 1:
As requested by Benjamin, I dput() function on both dataframes. Here is the output.
> dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
>
> dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
There is a difference in the output for both of them, there is an extra drop = TRUE for hth1.
I don't understand why it is not there in the other one.
A reproducible example:
library(tidyverse)
test1 <- mtcars %>% group_by(cyl)
test2 <- bind_rows(list(test1))
identical(test1, test2) #FALSE
all_equal(test1, test2) #TRUE
You can check both their attributes and you can see the rownames differ:
rownames(test1)
[1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
[4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
[7] "Duster 360" "Merc 240D" "Merc 230"
[10] "Merc 280" "Merc 280C" "Merc 450SE"
[13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
[16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
[19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
[22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
[25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
[28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
[31] "Maserati Bora" "Volvo 142E"
rownames(test2)
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13"
[14] "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26"
[27] "27" "28" "29" "30" "31" "32"
Never expect tibbles to treat your rownames with respect, they may be silently dropped at any time.
Forgive the formatting on this answer, but it would appear that you have labels attached to one object, and not in the other. Where the labels got attached or removed isn't something I can know without looking at code that generates the objects. I've bolded the difference in your objects below.
Note: not formatting this as code is a deliberate choice. Formatting as code prevents me from marking the difference in the structure in bold text
dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L , labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
In the example below, I will add labels to the mtcars data frame, then run it through bind_rows, and you'll see that the labels are no longer present. This is what I believe is happening to your data.
library(Hmisc)
mtcars2 <- mtcars
label(mtcars2, self = FALSE) <- toupper(names(mtcars))
library(dplyr)
mtcars3 <- bind_rows(mtcars2)
identical(mtcars2, mtcars3)
label(mtcars3)

Resources