Read CSV file up to line with unique marker - r

I have many data sets that have extra information beyond a certain line. The files are all csv. I would be able to loop through them and read.csv with "skip" argument to clean the top of the data, but the length of the data frames are all different. The only commonality is the "--------------- ---------------- ------ -----" line in the Total column that separates the meaningful data from summaries and extraneous info below it.
Here's how I'm reading in the data without skip = 14 (which is standard across everything).
before<-read.csv("Example.csv", header = FALSE,
col.names = c("CountryID","Name","Type","Symbol","Code","Unit",
"Total", "Measurement", "Value", "Percent", "CO2" ))
However, the ----- marker maybe a different row, but it's the first thing to hit. Here's the data before:
structure(list(CountryID = structure(c(26L, 19L, 21L, 23L, 21L,
7L, 1L, 1L, 1L, 22L, 3L, 1L, 19L, 2L, 8L, 14L, 15L, 13L, 9L,
12L, 18L, 17L, 8L, 13L, 15L, 10L, 8L, 8L, 11L, 16L, 1L, 1L, 1L,
20L, 4L, 6L, 1L, 25L, 5L, 1L, 1L, 1L, 24L, 1L), .Label = c("",
"------------", "-------------", "---------------", "------------------",
" ", "08.15.1997", "10000", "15000", "200", "2000", "2500", "3000",
"45000", "5000", "7000", "8000", "8300", "Country", "Output",
"Production", "Quantity", "Serial Output", "TOTAL SUM", "Unaccounted",
"United Nations Data"), class = "factor"), Name = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 20L, 2L, 1L, 1L, 1L, 21L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 19L, 1L, 1L, 1L, 1L), .Label = c("",
"--------------------", " ", "Bahrain", "Bangladesh", "Barbados",
"Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia",
"Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria",
"Burkina Faso", "Chad", "Name", "The Bahamas"), class = "factor"),
Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 4L,
2L, 1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("", "----", " ", "Code", "Type",
"Unit"), class = "factor"), Symbol = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 20L, 22L, 2L, 1L, 1L, 1L, 4L, 5L,
6L, 7L, 9L, 8L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 21L, 1L, 1L, 1L,
1L), .Label = c("", "------------", " ", "BAHM", "BAHR",
"BANG", "BARB", "BELGM", "BELS", "BELZ", "BEN", "BHUT", "BOL",
"BOSHER", "BOTS", "BRAZ", "BRUN", "BULG", "BURKF", "Country",
"private", "Symbol"), class = "factor"), Code = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 19L, 2L, 1L, 1L, 1L, 12L,
15L, 11L, 17L, 4L, 13L, 14L, 9L, 18L, 10L, 5L, 16L, 3L, 7L,
8L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "------------", "1504944270", "2287368539",
"2388991307", "2453202442", "2561470743", "3205402223", "3221488867",
"3230369605", "3247578406", "3712013344", "4307638090", "462793263",
"4835205752", "4854959101", "5842098895", "5932776587", "Code"
), class = "factor"), Unit = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 17L, 16L, 2L, 1L, 1L, 1L, 7L, 9L, 10L, 14L,
12L, 15L, 15L, 11L, 13L, 3L, 8L, 13L, 15L, 6L, 5L, 9L, 1L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"-------------", "100", "1109", "27", "35", "40", "45", "58",
"70", "74", "77", "79", "82", "95", "Output", "Per Unit"), class = "factor"),
Total = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 25L,
24L, 2L, 1L, 1L, 1L, 18L, 5L, 17L, 8L, 23L, 20L, 6L, 9L,
7L, 11L, 12L, 13L, 19L, 15L, 14L, 10L, 3L, 16L, 1L, 1L, 1L,
16L, 1L, 1L, 1L, 21L, 1L, 3L, 22L, 4L), .Label = c("", "---------------",
"--------------- ---------------- ------ -----",
"=============== ================ ====== =====",
"126912", "147431", "170553", "175973", "203728", "230761",
"293789", "304471", "376281", "386526", "399160", "4417002",
"476025", "478030", "502999", "51012", "5610654", "56406056",
"93351", "Output", "Total"), class = "factor"), Measurement = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 2L, 1L, 1L, 1L, 3L,
9L, 3L, 4L, 10L, 9L, 6L, 4L, 5L, 10L, 7L, 9L, 4L, 8L, 10L,
9L, 1L, 1L, 1L, 1L, 1L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "--------", "20", "23", "24", "26", "27",
"28", "29", "30", "420", "Measurement"), class = "factor"),
Value = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 22L,
23L, 2L, 1L, 1L, 1L, 5L, 19L, 11L, 8L, 3L, 18L, 13L, 6L,
4L, 9L, 14L, 17L, 7L, 10L, 12L, 15L, 1L, 16L, 1L, 1L, 1L,
16L, 1L, 1L, 1L, 20L, 1L, 1L, 21L, 1L), .Label = c("", "----------------",
"15150240", "15891735", "16083459", "16959919", "20350968",
"20909501", "21770264", "25121096", "27726279", "30024743",
"34069742", "34841369", "38498281", "468004111", "49524999",
"50512814", "50568702", "540650", "64506", "Country", "Value"
), class = "factor"), Percent = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 23L, 2L, 1L, 1L, 1L, 11L, 12L, 8L, 3L,
17L, 16L, 5L, 10L, 20L, 9L, 6L, 7L, 4L, 15L, 14L, 22L, 1L,
13L, 1L, 1L, 1L, 21L, 1L, 1L, 1L, 19L, 1L, 1L, 18L, 1L), .Label = c("",
"------", "102", "104", "106", "112", "126", "129", "142",
"15", "160", "177", "1775", "180", "191", "24", "25", "5640645",
"650163", "87", "887.5", "95", "Production Percent"), class = "factor"),
CO2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 14L,
2L, 1L, 1L, 1L, 9L, 4L, 9L, 7L, 4L, 5L, 4L, 7L, 4L, 9L, 4L,
11L, 4L, 12L, 10L, 4L, 1L, 6L, 1L, 1L, 1L, 8L, 1L, 1L, 1L,
3L, 1L, 1L, 13L, 1L), .Label = c("", "-----", "?", "0", "0.2",
"0.6", "1", "19.4", "2", "2.2", "4", "5", "564065", "CO2",
"Cur."), class = "factor")), class = "data.frame", row.names = c(NA,
-44L))
And here's how I'm hoping it could look:
structure(list(CountryID = c(10000L, 45000L, 5000L, 3000L, 15000L,
2500L, 8300L, 8000L, 10000L, 3000L, 5000L, 200L, 10000L, 10000L,
2000L, 7000L), Name = structure(c(16L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L), .Label = c("Bahrain",
"Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin",
"Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
"Brunei", "Bulgaria", "Burkina Faso", "The Bahamas"), class = "factor"),
Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Unit", class = "factor"),
Symbol = structure(c(1L, 2L, 3L, 4L, 6L, 5L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L), .Label = c("BAHM", "BAHR",
"BANG", "BARB", "BELGM", "BELS", "BELZ", "BEN", "BHUT", "BOL",
"BOSHER", "BOTS", "BRAZ", "BRUN", "BULG", "BURKF"), class = "factor"),
Code = c(3712013344, 4835205752, 3247578406, 5842098895,
2287368539, 4307638090, 462793263, 3221488867, 5932776587,
3230369605, 2388991307, 4854959101, 1504944270, 2561470743,
3205402223, 2453202442), Unit = c(40L, 58L, 70L, 82L, 77L,
95L, 95L, 74L, 79L, 100L, 45L, 79L, 95L, 35L, 27L, 58L),
Total = c(478030L, 126912L, 476025L, 175973L, 93351L, 51012L,
147431L, 203728L, 170553L, 293789L, 304471L, 376281L, 502999L,
399160L, 386526L, 230761L), Measurement = c(20L, 29L, 20L,
23L, 30L, 29L, 26L, 23L, 24L, 30L, 27L, 29L, 23L, 28L, 30L,
29L), Value = c(16083459L, 50568702L, 27726279L, 20909501L,
15150240L, 50512814L, 34069742L, 16959919L, 15891735L, 21770264L,
34841369L, 49524999L, 20350968L, 25121096L, 30024743L, 38498281L
), Percent = c(160L, 177L, 129L, 102L, 25L, 24L, 106L, 15L,
87L, 142L, 112L, 126L, 104L, 191L, 180L, 95L), CO2 = c(2,
0, 2, 1, 0, 0.2, 0, 1, 0, 2, 0, 4, 0, 5, 2.2, 0)), class = "data.frame", row.names = c(NA,
-16L))
Can this be integrated into the read.csv argument, or is it easier to clean the bottom of it some other way.

Three thoughts:
Use readLines (as #user2554330 suggested), find/remove the specific row, filter it, then parse the text vector with read.csv, the least of the three.
before[seq_len(min(head(which(!grepl("^[^- ]+$", before$Total)),1)-1L,nrow(before))),]; a bit complicated, granted, but it does what you need (assuming that you've already filtered the first 14 rows with skip=.
Use an external script such as sed -e '1,14d;/^[ -]\+$/{g;q;} in a pipe(...)-type thing.

Read it twice. The first time, use readLines("Example.csv"), and look through the lines for the marker of the end of data. Say it's on line n. Then in the second read, use
read.csv("Example.csv", header = FALSE,
col.names = c("CountryID","Name","Type","Symbol","Code","Unit",
"Total", "Measurement", "Value", "Percent", "CO2" ), nrows = n - 1)
(or maybe nrows will need to be a different value, if you're skipping some).

Related

Editing a Row Value in R

I have a data frame that looks like this
Pick Name Team Round Player Position Position..
1 1 Javi Texans 1 Patrick Mahomes QB QB1
2 2 Justin Chiefs 1 Russell Wilson QB QB2
3 3 Blake Titans 1 Lamar Jackson QB QB3
4 4 Connor Dolphins 1 Deshaun Watson QB QB4
5 5 Isaac Jaguars 1 Carson Wentz QB QB5
6 6 Fitz Rams 1 Dak Prescott QB QB6
with more rows of course and some of the rows in the Player, Position and Position... Column are empty because they haven't been drafted yet. Is there a way to just manually insert the names, pos, pos... of the newly drafted players.
I tried
Redraft[112, "Player"] <- "Calvin Ridley"; Redraft
Since the empty cells start on row 112, but it just came up as N/A
When I do that I also get an error message:
Warning message:
In `[<-.factor`(`*tmp*`, iseq, value = "Calvin Ridley") :
invalid factor level, NA generated
and the data frame looks like
08 108 Jack Packers 4 TE3 Darren Waller TE
109 109 Justin Saints 4 LT6 Taylor Lewan LT
110 110 Sam Steelers 4 FS5 Kevin Byard FS
111 111 Jeremy Falcons 4 LB7 Isaiah Simmons LB
112 112 Will Bills 4 1 <NA>
113 113 Jeremy Colts 4 1
And heres the whole data frame:
structure(list(Pick = 1:384, Name = structure(c(12L, 14L, 1L,
2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L, 13L, 20L,
13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L, 4L, 18L,
15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L, 8L, 14L,
17L, 13L, 20L, 13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L, 6L, 8L, 5L,
7L, 2L, 1L, 14L, 12L, 12L, 14L, 1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L,
9L, 2L, 10L, 16L, 11L, 13L, 20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L,
5L, 19L, 7L, 1L, 6L, 4L, 18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L,
19L, 5L, 19L, 3L, 3L, 8L, 14L, 17L, 13L, 20L, 13L, 11L, 16L,
10L, 2L, 9L, 12L, 9L, 6L, 8L, 5L, 7L, 2L, 1L, 14L, 12L, 12L,
14L, 1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L,
13L, 20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L,
4L, 18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L,
8L, 14L, 17L, 13L, 20L, 13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L,
6L, 8L, 5L, 7L, 2L, 1L, 14L, 12L, 12L, 14L, 1L, 2L, 7L, 5L, 8L,
6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L, 13L, 20L, 13L, 17L, 14L,
8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L, 4L, 18L, 15L, 15L, 18L,
4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L, 8L, 14L, 17L, 13L, 20L,
13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L, 6L, 8L, 5L, 7L, 2L, 1L,
14L, 12L, 12L, 14L, 1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L,
10L, 16L, 11L, 13L, 20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L,
19L, 7L, 1L, 6L, 4L, 18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L,
5L, 19L, 3L, 3L, 8L, 14L, 17L, 13L, 20L, 13L, 11L, 16L, 10L,
2L, 9L, 12L, 9L, 6L, 8L, 5L, 7L, 2L, 1L, 14L, 12L, 12L, 14L,
1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L, 13L,
20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L, 4L,
18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L, 8L,
14L, 17L, 13L, 20L, 13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L, 6L,
8L, 5L, 7L, 2L, 1L, 14L, 12L), .Label = c("Blake", "Connor",
"Dakota", "FFB", "Fitz", "Haydon", "Isaac", "Jack", "Jackson",
"Jacob", "Jacob H", "Javi", "Jeremy", "Justin", "Nick", "Pete",
"Sam", "Simon", "Tucker", "Will"), class = "factor"), Team = structure(c(30L,
10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L,
11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L,
12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L,
25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L,
8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L, 30L, 30L, 10L, 31L,
13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L, 11L, 4L,
15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L, 12L, 16L,
22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L, 25L, 1L,
20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L, 8L, 3L, 6L,
18L, 24L, 17L, 13L, 31L, 10L, 30L, 30L, 10L, 31L, 13L, 17L, 24L,
18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L, 11L, 4L, 15L, 29L, 27L,
20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L, 12L, 16L, 22L, 2L, 2L,
22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L, 25L, 1L, 20L, 27L, 29L,
15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L, 8L, 3L, 6L, 18L, 24L, 17L,
13L, 31L, 10L, 30L, 30L, 10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L,
8L, 7L, 28L, 9L, 21L, 14L, 11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L,
5L, 23L, 26L, 32L, 19L, 12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L,
19L, 32L, 26L, 23L, 5L, 25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L,
14L, 21L, 9L, 28L, 7L, 8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L,
30L, 30L, 10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L,
9L, 21L, 14L, 11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L,
26L, 32L, 19L, 12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L,
26L, 23L, 5L, 25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L,
9L, 28L, 7L, 8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L, 30L, 30L,
10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L,
11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L,
12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L,
25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L,
8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L, 30L), .Label = c("49ers",
"Bears", "Bengals", "Bills", "Broncos", "Browns", "Buccaneers",
"Cardinals", "Chargers", "Chiefs", "Colts", "Cowboys", "Dolphins",
"Eagles", "Falcons", "Giants", "Jaguars", "Jets", "Lions", "Packers",
"Panthers", "Patriots", "Raiders", "Rams", "Ravens", "Redskins",
"Saints", "Seahawks", "Steelers", "Texans", "Titans", "Vikings"
), class = "factor"), Round = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L),
Pos.. = structure(c(49L, 60L, 70L, 71L, 72L, 73L, 74L, 75L,
27L, 76L, 43L, 50L, 51L, 92L, 52L, 53L, 54L, 55L, 77L, 56L,
57L, 58L, 89L, 59L, 85L, 61L, 103L, 3L, 106L, 35L, 62L, 36L,
63L, 42L, 10L, 107L, 18L, 64L, 108L, 65L, 109L, 19L, 11L,
110L, 86L, 37L, 111L, 12L, 20L, 112L, 66L, 21L, 38L, 13L,
90L, 78L, 81L, 30L, 14L, 15L, 82L, 39L, 16L, 17L, 93L, 94L,
4L, 22L, 95L, 96L, 2L, 97L, 67L, 5L, 68L, 87L, 83L, 84L,
6L, 31L, 44L, 98L, 99L, 100L, 7L, 28L, 101L, 32L, 29L, 8L,
33L, 88L, 69L, 79L, 102L, 9L, 104L, 40L, 23L, 24L, 105L,
25L, 45L, 80L, 46L, 26L, 47L, 91L, 48L, 34L, 41L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("1", "C1", "CB1", "CB10", "CB11", "CB12",
"CB13", "CB14", "CB15", "CB2", "CB3", "CB4", "CB5", "CB6",
"CB7", "CB8", "CB9", "DE1", "DE2", "DE3", "DE4", "DE5", "DE6",
"DE7", "DE8", "DE9", "DT1", "DT2", "DT3", "FS1", "FS2", "FS3",
"FS4", "FS5", "LB1", "LB2", "LB3", "LB4", "LB5", "LB6", "LB7",
"LG1", "LT1", "LT2", "LT3", "LT4", "LT5", "LT6", "QB1", "QB10",
"QB11", "QB12", "QB13", "QB14", "QB15", "QB16", "QB17", "QB18",
"QB19", "QB2", "QB20", "QB21", "QB22", "QB23", "QB24", "QB25",
"QB26", "QB27", "QB28", "QB3", "QB4", "QB5", "QB6", "QB7",
"QB8", "QB9", "RB1", "RB2", "RB3", "RB4", "RG1", "RT1", "RT2",
"RT3", "SS1", "SS2", "SS3", "SS4", "TE1", "TE2", "TE3", "WR1",
"WR10", "WR11", "WR12", "WR13", "WR14", "WR15", "WR16", "WR17",
"WR18", "WR19", "WR2", "WR20", "WR21", "WR3", "WR4", "WR5",
"WR6", "WR7", "WR8", "WR9"), class = "factor"), Player = structure(c(87L,
91L, 72L, 38L, 14L, 24L, 79L, 78L, 3L, 57L, 90L, 70L, 107L,
31L, 39L, 10L, 56L, 68L, 20L, 4L, 94L, 93L, 45L, 52L, 51L,
44L, 80L, 97L, 62L, 67L, 40L, 98L, 101L, 89L, 50L, 9L, 85L,
104L, 19L, 41L, 109L, 58L, 106L, 81L, 37L, 26L, 29L, 27L,
18L, 86L, 60L, 84L, 17L, 74L, 105L, 95L, 111L, 63L, 76L,
55L, 92L, 110L, 12L, 15L, 64L, 96L, 34L, 25L, 61L, 103L,
54L, 21L, 53L, 49L, 59L, 108L, 71L, 83L, 77L, 82L, 102L,
7L, 65L, 2L, 69L, 32L, 22L, 75L, 43L, 5L, 8L, 46L, 35L, 42L,
23L, 88L, 6L, 11L, 60L, 48L, 16L, 13L, 30L, 36L, 73L, 33L,
99L, 28L, 100L, 66L, 47L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "A.J. Brown",
"Aaron Donald", "Aaron Rodgers", "Adoree' Jackson", "Allen Robinson",
"Amari Cooper", "Anthony Harris", "Antonio Brown", "Baker Mayfield",
"Bobby Wagner", "Byron Jones ", "Cameron Jordan", "Carson Wentz",
"Casey Hayward", "CeDee Lamb", "Chandler Jones", "Chase Young",
"Chris Godwin", "Christian McCaffrey", "Cooper Kupp", "Courtland Sutton",
"D.J. Moore", "Dak Prescott", "Danielle Hunter", "Darius Leonard",
"Darius Slay", "Darren Waller", "DaVante Adams", "David Bakhtiari",
"DeAndre Hopkins", "Deforest Buckner", "Demarcus Lawrence",
"Denzel Ward", "Derek Carr", "Derrick Henry", "Derwin James",
"Deshaun Watson", "Drew Brees", "Drew Lock", "Dwayne Haskins",
"Ezekiel Elliott", "Fletcher Cox", "Gardner Minshew", "George Kittle",
"Harrison Smith", "Isaiah Simmons", "J.J. Watt", "Jaire Alexander",
"Jalen Ramsey", "Jamal Adams", "Jared Goff", "Jarrett Stidham",
"Jason Kelce", "Jeffrey Okudah", "Jimmy Garappolo", "Joe Burrow",
"Joey Bosa", "Jordan Love", "Josh Allen", "Juju Smith-Schuster",
"Julio Jones", "Justin Simmons", "Keenan Allen", "Kenny Golladay",
"Kevin Byard", "Khalil Mack", "Kirk Cousins", "Kyle Fuller",
"Kyler Murray ", "La'el Collins", "Lamar Jackson ", "Laremy Tunsil",
"Marcus Peters", "Marcus Williams", "Marlon Humphrey", "Marshon Lattimore",
"Matt Ryan", "Matthew Stafford", "Michael Thomas", "Mike Evans",
"Minkah Fitzpatrick", "Mitchell Schwartz ", "Myles Garrett",
"Nick Bosa", "Odell Beckham Jr.", "Patrick Mahomes ", "Patrick Peterson",
"Quenton Nelson", "Ronnie Stanley", "Russell Wilson ", "Ryan Ramczyk",
"Ryan Tannehill", "Sam Darnold", "Saquon Barkley", "Stefon Diggs",
"Stephon Gilmore", "T.J. Watt", "Taylor Decker", "Taylor Lewan",
"Teddy Bridgewater", "Terron Armstead", "Terry McLaurin",
"Tom Brady", "Travis Kelce", "Tre White", "Tua Tagovailoa",
"Tyrann Mathieu", "Tyreek Hill", "Von Miller", "Zack Martin"
), class = "factor"), Position = structure(c(10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 5L, 10L, 9L, 10L, 10L, 16L, 10L,
10L, 10L, 10L, 11L, 10L, 10L, 10L, 15L, 10L, 14L, 10L, 16L,
3L, 16L, 7L, 10L, 7L, 10L, 8L, 3L, 16L, 4L, 10L, 16L, 10L,
16L, 4L, 3L, 16L, 14L, 7L, 16L, 3L, 4L, 16L, 10L, 4L, 7L,
3L, 15L, 11L, 12L, 6L, 3L, 3L, 13L, 7L, 3L, 3L, 16L, 16L,
3L, 4L, 16L, 16L, 2L, 16L, 10L, 3L, 10L, 14L, 13L, 13L, 3L,
6L, 9L, 16L, 16L, 16L, 3L, 5L, 16L, 6L, 5L, 3L, 6L, 14L,
10L, 11L, 16L, 3L, 16L, 7L, 4L, 4L, 16L, 4L, 9L, 11L, 9L,
4L, 9L, 15L, 9L, 6L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "C",
"CB", "DE", "DT", "FS", "LB", "LG", "LT", "QB", "RB", "RG",
"RT", "SS", "TE", "WR"), class = "factor")), row.names = c(NA,
384L), class = "data.frame")
You're dealing with a factor column. "Calvin Ridley" isn't yet a level of the factor. After adding it you can rename the cell.
class(Redraft$Player)
# [1] "factor"
levels(Redraft$Player) <- c(levels(Redraft$Player), "Calvin Ridley")
Redraft[112, "Player"] <- "Calvin Ridley"
Redraft[112, "Player"]
# [1] Calvin Ridley
# 112 Levels: A.J. Brown Aaron Donald Aaron Rodgers Adoree' Jackson Allen Robinson ... Calvin Ridley
jay.sf's answer is correct, of course, but I'd add my 2ยข since I think it's missing the point.
The reason you have factors instead of plain strings here in the first place, is kind of a historical accident with R being a statistical language. In practice, you rarely want to be dealing with factors in a dataframe of this kind. You probably want your player names to be plain-old strings.
Typically when you read a dataframe from a file, e.g. via read.csv, you have the option to pass the argument stringsAsFactors = TRUE, to ensure that strings are kept as strings rather than converted to factors. Some people (e.g. this guy) feel so strongly against this bizzare default behaviour, that they include a line in their .Rprofile to make importing data with stringsAsFactors=T as their default. (but this is dangerous for writing code that works the same across users with different .Rprofile initializations!)
If you already have the dataset, you can convert your factors to strings instead:
df[ , 'Player'] <- as.character( df[ , 'Player' ] )
You can now continue with your analysis without worrying about factors and their annoyances.
E.g. setting a new name is as simple as you'd expect:
df[112,'Player'] <- 'Calvin Ridley'

plot area truncated when using geom_dotplot

consider the following example data:
ex = structure(list(group = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 6L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 6L,
1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L,
5L, 6L, 1L, 2L, 1L, 2L, 3L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
4L), .Label = c("A", "B", "C", "D", "E", "F"), class = "factor"),
ID = structure(c(35L, 35L, 35L, 35L, 35L, 35L, 1L, 1L, 1L,
1L, 1L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 9L, 9L,
9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 21L, 21L, 22L, 22L,
22L, 22L, 2L, 3L, 4L, 5L, 8L, 15L, 16L, 17L, 18L, 19L, 19L,
20L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 30L, 31L, 32L,
33L, 34L), .Label = c("10", "107", "108", "109", "124", "17",
"18", "187", "19", "21", "24", "26", "27", "28", "335", "336",
"339", "340", "341", "342", "38", "39", "576", "577", "578",
"579", "580", "581", "582", "583", "584", "585", "586", "592",
"6"), class = "factor"), value = c(1L, 7L, 4L, 4L, 3L, 9L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 5L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 33L, 27L, 28L, 21L, 28L, 1L, 3L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L)), class = "data.frame",
row.names = c(NA, -88L), .Names = c("group", "ID", "value")
)
Note that in group A, value = 1 for every ID. I use ggplot2 to create dot plot based on counts of the value variable using geom_dotplot and faceting by group:
ggplot(ex) + aes(x = value) +
geom_dotplot(binwidth = 1, method = "histodot") +
facet_wrap(~ group)
The dot stack in the first facet is cut off, even when exported using ggsave. Changing the y-axis limits has no effect, but changing the aspect ratio so that H >= W seems to fix the issue (usually by adding way more space to the top than necessary). Is this a bug, or am I specifying my dot plot incorrectly?
EDIT
One workaround is to flip my dotplot and bin by the y variable:
ggplot(ex) + aes(x = group, y = value) +
geom_dotplot(binwidth = 1, method = "histodot",
binaxis = "y", stackdir = "centerwhole") +
facet_wrap(~ group, scales = "free_x")
Two other parameters that can help you are stackratio and dotsize. For example:
ggplot(ex) + aes(x = value) +
geom_dotplot(binwidth = 1, method = "histodot", stackratio = 0.9, dotsize = .75) +
facet_wrap(~ group) +
scale_y_continuous(NULL, breaks = NULL)
You would need to tweak the numbers until you got the layout you wanted.
I found an interesting workaround using geom_bar that achieves the same structure as a dot plot but with rectangles:
ggplot(ex) + aes(x = value, group = ID) +
geom_bar(color = "black", fill = "white", width = 1) +
facet_wrap(~ group)
Although it results in rectangles (rather than dots) and you can't control the stack spacing. The rectangles get resized according to the plot window, which would be equivalent to tweaking the dot size in geom_dotplot. Also, it begs the question "why not just use a regular bar plot?"

Error in multiple selectInput in Shiny

This app is meant to create a subset of a larger dataset based on the 2 inputs selection in Shiny app. I have used a dropdownbutton function I found on here.
# func --------------------------------------------------------------------
dropdownButton <- function(label = "", status = c("default", "primary", "success", "info", "warning", "danger"), ..., width = NULL) {
status <- match.arg(status)
# dropdown button content
html_ul <- list(
class = "dropdown-menu",
style = if (!is.null(width))
paste0("width: ", validateCssUnit(width), ";"),
lapply(X = list(...), FUN = tags$li, style = "margin-left: 10px; margin-right: 10px;")
)
# dropdown button apparence
html_button <- list(
class = paste0("btn btn-", status," dropdown-toggle"),
type = "button",
`data-toggle` = "dropdown"
)
html_button <- c(html_button, list(label))
html_button <- c(html_button, list(tags$span(class = "caret")))
# final result
tags$div(
class = "dropdown",
do.call(tags$button, html_button),
do.call(tags$ul, html_ul),
tags$script(
"$('.dropdown-menu').click(function(e) {
e.stopPropagation();
});")
)
}
My app is meant to create a subset of a larger dataset based on the 2 inputs selection in Shiny app. For both dropdown menu, I want multiple select option similar to multiple=TRUE in selectInput. Although dropdownbutton menu allows me to select multiple options, it randomly omits data in output that should be included. I get the correct subset when I use selectInput. Any solutions?
Second, my selectAll button doesn't work.
The problem is that when I select
ou1 <- levels(df$OperatingUnit)
ou <- setNames(as.list(ou1),ou1)
indi1 <- levels(df$indicator)
indi <- setNames(as.list(indi1),indi1)
ui->...
inputPanel(
dropdownButton(
label = "Select OU", status = "default", width = 120,
actionButton(inputId = "all", label = "Select all"),
checkboxGroupInput(inputId = "check1", label = "Choose", choices = paste(ou))
),
dropdownButton(
label = "Select Indicators", status = "default", width = 150,
checkboxGroupInput(inputId = "check2", label = "Choose", choices = paste(indi))
),
tableOutput("tab1")
))
Server -->
shinyServer(function(input, output, session) {
dataset - changes based on menu selections
df1 <- reactive({
df[df$OperatingUnit==input$check1 & df$indicator==input$check2,]
})
output$tab1<- renderTable({
head(df1(), n = 10)
})
output$downloadData <- downloadHandler(
filename = function() {
paste("PSNU_IM", '.csv', sep='')
},
content = function(file) {
write.csv(df1(), file)
}
)
# Select all / Unselect all
observeEvent(input$all, {
if (is.null(input$check1)) {
updateCheckboxGroupInput(
session = session, inputId = "check1", selected = paste(ou)
)
} else {
updateCheckboxGroupInput(
session = session, inputId = "check1", selected = ""
)
}
})
})
)`
Subset of my data:
structure(list(Region = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "Africa", class = "factor"), OperatingUnit = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("Angola", "Botswana",
"Cameroon"), class = "factor"), SNU1Uid = structure(c(5L, 5L,
9L, 9L, 4L, 5L, 1L, 4L, 5L, 4L, 4L, 5L, 4L, 9L, 3L, 9L, 4L, 9L,
4L, 4L, 4L, 3L, 4L, 4L, 5L, 5L, 9L, 5L, 4L, 4L, 3L, 3L, 9L, 4L,
4L, 9L, 4L, 7L, 8L, 6L, 2L), .Label = c("", "BTRiZA58YEx", "HxXMyMSODnm",
"IaFLxtEwIwk", "Jm3YTCERxvX", "MERiZA58YEx", "MTRiZA58YEx", "MTRiZA68YEx",
"MTRiZG58YEx"), class = "factor"), PSNUuid = structure(c(29L,
11L, 23L, 23L, 10L, 29L, 1L, 13L, 18L, 30L, 8L, 2L, 9L, 7L, 15L,
19L, 33L, 16L, 27L, 31L, 21L, 3L, 20L, 25L, 14L, 32L, 7L, 28L,
22L, 22L, 24L, 12L, 16L, 8L, 9L, 5L, 10L, 4L, 6L, 17L, 26L), .Label = c("",
"a2nQs7VmYiD", "AbJXFBhkc4U", "AFX0cjkDX6A", "AFX0djkDX6A", "AFX0djkFX6A",
"AW764lDxjdr", "clasYX5teTV", "fHkrk3yL1uU", "gOaZeiwAoCD", "GP5qeoiXMtA",
"hvNtuMClAXW", "hz2Tdvrxqbp", "JIcgSOsSpSV", "js5vRAkkqxB", "k7lIVnxWbm7",
"KFX0djkDX6A", "MIvAFWhI9Yc", "Ns6ZJi0iwJj", "oAgxCCStCQe", "PJKaNADvNfi",
"r5xWCJ4ZqYQ", "rjDWLPMhaY0", "VaHOXJU4rir", "vewKgey8sOW", "VFX0djkDX6A",
"Vq1CnJNw46x", "vqaBeYFtUn0", "VZPPWeDuJqU", "YuCzvkHV2X5", "YXiMSh7CqES",
"zU7eKPwFr69", "ZxJNWnk4hYW"), class = "factor"), indicator = structure(c(5L,
5L, 1L, 5L, 1L, 1L, 4L, 1L, 1L, 2L, 5L, 1L, 1L, 5L, 1L, 5L, 5L,
5L, 5L, 3L, 1L, 5L, 1L, 1L, 5L, 5L, 1L, 5L, 1L, 3L, 1L, 1L, 5L,
5L, 1L, 5L, 5L, 6L, 6L, 5L, 5L), .Label = c("CARE_CURR", "GEND_GBV",
"GEND_NORM", "HRH_PRE", "TX_CURR", "TX_NEW"), class = "factor"),
numeratorDenom = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "N", class = "factor"), indicatorType = structure(c(1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("DSD",
"TA"), class = "factor"), disaggregate = structure(c(4L,
1L, 1L, 6L, 6L, 1L, 5L, 1L, 2L, 1L, 6L, 1L, 1L, 3L, 1L, 6L,
6L, 1L, 1L, 6L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Age/Sex",
"Age/Sex Aggregated", "Age/Sex, Aggregated", "Aggregated Age/Sex",
"Grad Cadre", "Total Numerator"), class = "factor"), categoryOptionComboName = structure(c(8L,
7L, 16L, 13L, 13L, 23L, 19L, 14L, 9L, 3L, 13L, 16L, 21L,
2L, 17L, 13L, 13L, 2L, 11L, 13L, 18L, 1L, 14L, 4L, 1L, 10L,
23L, 12L, 20L, 6L, 22L, 15L, 7L, 12L, 16L, 5L, 2L, 12L, 16L,
5L, 2L), .Label = c("<1, Female", "<1, Male", "<10, Female",
"<15, Male", "1-4, Male", "10-14, Male", "15-19, Female",
"15+, Female", "15+, Male", "20+, Female", "20+, Male", "5-14, Male",
"default", "Female, 15-19", "Female, 20-24", "Female, 25-49",
"Female, 5-9", "Female, 50+", "Lab Professionals", "Male, <1",
"Male, 1-4", "Male, 20-24", "Male, 50+"), class = "factor"),
Age = structure(c(10L, 9L, 13L, 1L, 1L, 14L, 1L, 9L, 10L,
3L, 1L, 13L, 5L, 2L, 6L, 1L, 1L, 2L, 12L, 1L, 14L, 2L, 9L,
4L, 2L, 12L, 14L, 7L, 2L, 8L, 11L, 11L, 9L, 7L, 13L, 5L,
2L, 7L, 13L, 5L, 2L), .Label = c(" ", " <01", " <10",
" <15", " 01-04", " 05-09", " 05-14", " 10-14", " 15-19",
" 15+", " 20-24", " 20+", " 25-49", " 50+"), class = "factor"),
Sex = structure(c(2L, 2L, 2L, 1L, 1L, 3L, 1L, 2L, 3L, 2L,
1L, 2L, 3L, 3L, 2L, 1L, 1L, 3L, 3L, 1L, 2L, 2L, 2L, 3L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L,
3L), .Label = c("", "Female", "Male"), class = "factor"),
FY2016Q4 = c(60L, 42L, 144L, 606L, 977L, 509L, 475L, 827L,
455L, 966L, 432L, 372L, 333L, 0L, 583L, 339L, 423L, 107L,
341L, 45L, 818L, 299L, 679L, 261L, 964L, 338L, 470L, 879L,
421L, 351L, 490L, 464L, 416L, 844L, 752L, 708L, 506L, 889L,
230L, 586L, 576L), FY2016APR = c(4L, 471L, 106L, 876L, 873L,
490L, 65L, 360L, 232L, 673L, 646L, 548L, 495L, 846L, 215L,
11L, 405L, 953L, 411L, 610L, 907L, 755L, 580L, 967L, 594L,
213L, 227L, 54L, 25L, 930L, 266L, 512L, 277L, 150L, 454L,
478L, 720L, 666L, 249L, 886L, 781L), FY2017_TARGETS = c(464L,
853L, 907L, 598L, 685L, 791L, 232L, 981L, 217L, 705L, 920L,
890L, 144L, 545L, 159L, 615L, 72L, 570L, 325L, 138L, 919L,
743L, 316L, 673L, 867L, 488L, 652L, 683L, 805L, 616L, 701L,
911L, 985L, 595L, 576L, 132L, 396L, 856L, 721L, 353L, 105L
)), .Names = c("Region", "OperatingUnit", "SNU1Uid", "PSNUuid",
"indicator", "numeratorDenom", "indicatorType", "disaggregate",
"categoryOptionComboName", "Age", "Sex", "FY2016Q4", "FY2016APR",
"FY2017_TARGETS"), class = "data.frame", row.names = c(NA, -41L
))
Hi when you filter your data.frame use %in% instead of == because the two vectors you compare don't have the same length, e.g. :
df1 <- reactive({
df[df$OperatingUnit %in% input$check1 & df$indicator %in% input$check2, ]
})
For the second "select all" button you have to put an observeEvent in your server if you want that something happen if you click on it :
observeEvent(input$all1, {
if (is.null(input$check2)) {
updateCheckboxGroupInput(
session = session, inputId = "check2", selected = paste(indi)
)
} else {
updateCheckboxGroupInput(
session = session, inputId = "check2", selected = ""
)
}
})

Calculating seasonal index from tbats components

I have aggregated retail weekly data with seasonal periods of 52.2 (a 53rd week every five years). I want to use this aggregated data to calculate a seasonal index that can be applied to each item within the category to derive its de-seasonalised demand.
Using stl, I would calculate the seasonal index as "seasonal" / "trend" + 1 (normalised to 52). I switched to tbats because my seasonality was not an integer and I have multiple seasonal periods (52.2 and 261)
I am using tbats with seasonal.periods = 52.2 and extract the components using tbats.components. The components are "observed", "level" and "season". Google has not revealed much in terms of what these components are and how to consume them. I also extracted the residuals
I noticed that "observed" is the log of my data. I also notice that season is changing over time (which is exactly what I want)
My questions are:
1.Is "season" a natural log too?
2.How can I extract the future "season" values? I can run a forecast on the data so I am assuming that there must be a projected "season"
3. What would be the best approach to calculating an "index" considering that it will be divided into the granular data. I am currently using: exp("season") / centered moving average(exp("season"))
My Data:
weeklyu <-structure(list(V1 = c(8L, 5L, 7L, 3L, 1L, 2L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 8L, 2L,
4L, 8L, 6L, 7L, 8L, 9L, 15L, 15L, 13L, 9L, 16L, 19L, 16L, 16L,
10L, 31L, 45L, 90L, 185L, 34L, 8L, 19L, 11L, 19L, 21L, 8L, 5L,
7L, 6L, 3L, 10L, 2L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 16L, 22L, 18L, 23L, 11L, 5L, 8L, 21L, 18L, 11L, 26L,
28L, 9L, 3L, 6L, 3L, 6L, 1L, 5L, 3L, 3L, 2L, 1L, 4L, 1L, 1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
19L, 11L, 17L, 23L, 50L, 52L, 23L, 18L, 22L, 44L, 37L, 22L, 30L,
32L, 47L, 34L, 30L, 26L, 25L, 44L, 87L, 65L, 30L, 17L, 12L, 2L,
16L, 14L, 17L, 6L, 7L, 3L, 6L, 7L, 8L, 11L, 12L, 4L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L)), .Names = "V1", class = "data.frame", row.names = c(NA,
-188L))
My Code:
wklytbat <- tbats(msts(weeklyu, seasonal.periods = 52.2, ts.frequency=52.2), use.parallel=FALSE)
extract season:
seasu <-data.table(exp(as.numeric(tbats.components(wklytbat)[,'season'])))

format color and legend in ggplot geom_tile of p-values

I am currently trying to make a 'heat map' using ggplot2 to display a series of p-values, but can't figure out how to tailor the actual color assignments and legend.
sampledata.m <- melt(sampledata)
sampledata.m$var2 <- as.character(sampledata.m$var2)
sampledata.m$var2 <- factor(sampledata.m$var2, levels=unique(sampledata.m$var2),ordered=TRUE)
sampledata.m$var1 <- as.character(sampledata.m$var1)
sampledata.m$var1 <- factor(sampledata.m$var1, levels=unique(sampledata.m$var1),ordered=TRUE)
This was done so that I could maintain the order of my variables.
p <- ggplot(sampledata.m, aes(var2, var1)) +
geom_tile(aes(fill = value), colour = "transparent") +
scale_fill_gradientn(colours=c("light green","dark green", "black"),
values=rescale(c(0,0.0003,0.05,0.5,1)),limits=c(0,1)))
p + theme_bw(base_size = base_size) + labs(x = "", y = "") +
scale_x_discrete(expand = c(0,0)) +
theme(legend.position = "bottom", axis.ticks = element_blank(),
axis.text.x = element_text(size = base_size * 0.8, angle = 310,
hjust = 0, colour = "black"))
This creates a nice looking plot, however my legend and my color gradient don't represent the rescale that I assigned. Forgive my ignorance if this is a simple fix, but I've only been coding R for about 2 weeks now. Ideally, I would love my plot and legend to mimic the color scheme and legend labeling similar to this paper: http://www.ncbi.nlm.nih.gov/pubmed/22496159
structure(list(var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("A", "B", "C",
"D", "E"), class = "factor"), var2 = structure(c(1L,
5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L,
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L,
8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L,
4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L,
20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L,
13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L,
2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L,
23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L,
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L), .Label = c("1", "2",
"3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21",
"22", "23", "24"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "pvalue", class = "factor"),
value = c(0.810172671, 0.596026338, 0.076550169, 0.908670635,
0.300418653, 0.051553286, 0.124196482, 0.601568833, 0.058431468,
0.341726981, 0.876674726, 0.002698295, 0.812059425, 0.068199656,
0.758383287, 0.60362134, 0.89265723, 0.246111936, 0.156348035,
0.909574522, 0.020202377, 0.388843992, 0.769441835, 0.102272916,
0.38895717, 0.882296525, 0.792438683, 0.000491393, 0.004233434,
0.202424095, 0.426941568, 0.08520186, 0.763036306, 0.602828564,
0.037278697, 0.121642743, 0.669123606, 0.974328438, 0.834329923,
0.050413697, 0.078476666, 0.387647156, 0.000540422, 0.379576632,
0.361428444, 0.502439758, 0.001326035, 0.027652693, 0.188885638,
0.579244445, 0.471985778, 0.677458228, 0.119307242, 0.364857868,
0.238260538, 0.53472206, 0.204344281, 0.291888993, 0.295809688,
0.00029, 0.005476157, 0.960975822, 0.00029, 0.055915429,
0.618284682, 0.040605253, 0.521649682, 0.421086546, 0.164333061,
0.755528982, 0.306854182, 0.012832628, 0.270393143, 0.946675764,
0.59227376, 0.112658388, 0.429091426, 0.01662083, 0.017342483,
0.065817234, 0.012140224, 0.359828816, 0.031969725, 0.00029,
0.14555102, 0.18865081, 0.00029, 0.064107531, 0.505257768,
0.070224536, 0.017082975, 0.375864198, 0.00029, 0.104103689,
0.898979883, 0.004879605, 0.003597954, 0.036722932, 0.849058218,
0.00029, 0.003739938, 0.00029, 0.00029, 0.00029, 0.008179017,
0.193870353, 0.460181712, 0.389475522, 0.00029, 0.8785017,
0.070414642, 0.584977921, 0.990764677, 0.767253318, 0.002234906,
0.051331823, 0.00446149, 0.234477639, 0.275139791)), .Names = c("var1", "var2", "variable", "value"), row.names = c(NA, -119L), class = "data.frame")
I'm not going to get into all of the theme settings you've got - as I understand it the key of your problem is the scale of the fill gradient. You can set this in scale_fill_gradient() with a log transformation:
p <- ggplot(sampledata.m, aes(var2, var1)) +
geom_tile(aes(fill = value), colour = "transparent") +
scale_fill_gradient(trans = "log", low = "light green", high = "black",
breaks = c(0, 0.001, 0.05, 0.5))
dt <- data.frame(
N=letters[5:11],
a=c(0.01,0.05,0.1,0.5,1,5,10),
b=c(10,20,50,100,200,1000,2000))
dt.mlt <- melt(dt,variable.name="Cls",value.name="Val")
ggplot(dt.mlt,aes(x=N,y=Cls,fill=Val))+
geom_tile()+
scale_fill_gradient2(
low="green",high="red",mid="black",trans="log",breaks=c(0,0.01,0.1,1,10,100,1000))+
geom_text(data=dt.mlt,aes(x=N,y=Cls,label=Val))
But if I add the midpoint=10 to the scale_fill_gradient2, the picture will become:

Resources