xAxis order of R highcharter column plot - r

With the following data frame:
dta <- structure(list(sociodemographic_var = structure(c(3L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 26L, 18L, 20L, 21L, 26L, 13L, 16L, 21L, 22L, 26L, 26L,
9L, 13L, 17L, 18L, 20L, 21L, 23L, 26L, 20L, 26L), levels = c("1st grade",
"2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
"7th grade", "8th grade", "9th grade", "10th grade", "11th grade",
"12th grade, no diploma", "High school graduate", "GED or equivalent",
"Some college, no degree", "Less than 1 year of college credit/post-secondary education (or less than 10 classes)",
"One year or more of college credit, no degree", "Associate degree: Occupational, Technical, or Vocational",
"Associate degree: Academic Program", "Bachelor's degree (ex. BA, AB, BS, BBS)",
"Master's degree (ex. MA, MS, MEng, MEd, MBA)", "Professional School degree (ex. MD, DDS, DVN, JD)",
"Doctoral degree (ex. PhD, EdD)", "Refused to answer", "Don't Know",
"unknown"), class = "factor"), event = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 7L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 11L, 11L), levels = c("Baseline", "0.5 Year", "1 Year",
"1.5 Year", "2 Year", "2.5 Year", "3 Year", "3.5 Year", "4 Year",
"4.5 Year", "5 Year", "5.5 Year", "6 Year", "Screener"), class = "factor"),
visit_type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), levels = c("on-site", "hybrid", "remote", "unknown"), class = "factor"),
n = c(2L, 13L, 5L, 9L, 15L, 18L, 26L, 25L, 192L, 27L, 485L,
224L, 183L, 1011L, 666L, 55L, 78L, 3L, 9L, 1L, 1L, 2L, 208L,
1L, 1L, 1L, 1L, 126L, 28L, 1L, 1L, 2L, 2L, 3L, 4L, 1L, 543L,
1L, 300L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-39L))
I would assume that, generating a highcharter bar plot with:
library(highcharter) # v0.9.4
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "") |>
hc_plotOptions(series = list(stacking = "percent"))
the xAxis would be ordered by levels(dta$event):
levels(dta$event)
[1] "Baseline" "0.5 Year" "1 Year" "1.5 Year" "2 Year" "2.5 Year" "3 Year" "3.5 Year" "4 Year" "4.5 Year" "5 Year" "5.5 Year"
[13] "6 Year" "Screener"
But the ordering is different and neither alphabetical nor based on the total number of values:
I am interested to understand why it's the case and how to set the order right.

You can add categories to your hc_xAxis to make an order like this:
library(highcharter)
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "", categories = levels(dta$event)) |>
hc_plotOptions(series = list(stacking = "percent"))
Output:

Related

Using a regression model to predict values

I am currently working with 2 separate CSV datasets. I have already used the first data set named PRICEtable4.1 to visualize a relationship between the x values (GBA) and the y values (PRICE). I have attached a picture of the graph right below.
What I need to do now is use that cubic regression model from the first CSV dataset to predict the y values (PREDICTED_PRICE) in the second CSV dataset based on the x values (GBA) given. Is there a function that lets me make that connection? The code I used to create the regression model is below
train_X <- PRICEtable4.1$GBA
train_y <- PRICEtable4.1$PRICE
test_X <- PRICEtable4.1$GBA
test_y <- PRICEtable4.1$PRICE
X <- train_X
view(X)
y <- train_y
View(y)
poly_order <- 3
model <- lm (y~poly(X, poly_order))
print(model)
#MSE
test_yhat <- predict(model, data.frame (X = test_X))
MSE <- mean((test_y-test_yhat )^2)
print(MSE)
#R squared
test_ymean <- mean(test_y)
test_yhatmean <- mean( test_yhat)
R_squared <- (sum((test_yhat-test_yhatmean)*(test_y-test_ymean)))^2/(sum((test_yhat-test_yhatmean)^2)*sum((test_y-test_ymean)^2))
print(R_squared)
error2 <- data.frame(MSE=c(MSE),R_squared=c(R_squared))
View(error2)
#Visualization of the model
X_new = X
View(X_new)
y_new <- predict(model, data.frame (X = X_new))
View(y_new)
PRICEmodel <- ggplot(PRICEtable4.1,aes(x=GBA,y=PRICE))+geom_point(size=2)
PRICEmodel+geom_line(aes(x=X_new,y=y_new),color="Red") + labs(x="Gross building area (ft^2)", y="Price", title="Price Regression Model")
Here's a dput of the first dataset named PRICEtable4.1 (first 20 rows)
structure(list(ID = c(1L, 2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), GBA = c(1324L,
2120L, 1216L, 1804L, 1836L, 1228L, 1312L, 1262L, 1461L, 1120L,
1037L, 832L, 1500L, 920L, 1565L, 1134L, 1184L, 1420L, 2082L,
1422L), PRICE = c(1375000L, 1467000L, 549410L, 1180000L, 828000L,
742000L, 829000L, 710000L, 775000L, 380000L, 600000L, 189000L,
200000L, 265000L, 560000L, 300000L, 200000L, 940000L, 1050000L,
979000L)), row.names = c(NA, 20L), class = "data.frame")
Here's the dput of the second CSV dataset named Test (first 20 rows)
structure(list(ID = c(1L, 2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), GBA = c(1324L,
2120L, 1216L, 1804L, 1836L, 1228L, 1312L, 1262L, 1461L, 1120L,
1037L, 832L, 1500L, 920L, 1565L, 1134L, 1184L, 1420L, 2082L,
1422L), PRICE = c(1375000L, 1467000L, 549410L, 1180000L, 828000L,
742000L, 829000L, 710000L, 775000L, 380000L, 600000L, 189000L,
200000L, 265000L, 560000L, 300000L, 200000L, 940000L, 1050000L,
979000L)), row.names = c(NA, 20L), class = "data.frame")
> dput(Test[1:20, ])
structure(list(ID = 1:20, BATHRM = c(2L, 2L, 1L, 3L, 4L, 2L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 3L, 2L), HF_BATHRM = c(1L,
1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 0L), HEAT = c("Forced Air", "Forced Air", "Warm Cool",
"Forced Air", "Forced Air", "Warm Cool", "Hot Water Rad", "Forced Air",
"Warm Cool", "Forced Air", "Forced Air", "Hot Water Rad", "Forced Air",
"Forced Air", "Warm Cool", "Forced Air", "Forced Air", "Warm Cool",
"Ht Pump", "Forced Air"), AC = c("Y", "Y", "N", "Y", "Y", "Y",
"N", "Y", "Y", "N", "N", "N", "N", "Y", "Y", "Y", "Y", "Y", "Y",
"Y"), NUM_UNITS = c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), ROOMS = c(9L, 7L, 6L, 7L,
13L, 5L, 7L, 7L, 6L, 7L, 7L, 8L, 5L, 8L, 5L, 8L, 6L, 8L, 10L,
7L), BEDRM = c(3L, 3L, 3L, 4L, 6L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 4L, 2L, 4L, 3L, 2L, 3L, 3L), AYB = c(1870L, 1890L, 1911L,
1920L, 1993L, 1947L, 1895L, 1910L, 1910L, 1950L, 1951L, 1928L,
1941L, 2018L, 1939L, 2018L, 1980L, 1951L, 1910L, 1908L), YR_RMDL = c(1980L,
1963L, NA, 2001L, 2018L, NA, 1987L, 2017L, NA, NA, NA, NA, NA,
NA, 1992L, NA, 2013L, 2005L, 2004L, 1984L), EYB = c(1967L, 1982L,
1957L, 1972L, 2003L, 1958L, 1957L, 1964L, 1954L, 1960L, 1951L,
1954L, 1961L, 2018L, 1957L, 2018L, 1991L, 1961L, 1975L, 1960L
), STORIES = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1.75, 2, 1, 2,
2, 2, 2, 2, 2), GBA = c(1324L, 2120L, 1216L, 1804L, 5036L, 1836L,
1228L, 1312L, 1262L, 1461L, 1120L, 1037L, 832L, 1500L, 920L,
1565L, 1134L, 1184L, 1420L, 2082L), BLDG_NUM = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), STYLE = c("2 Story", "2 Story", "2 Story", "2 Story", "2 Story",
"2 Story", "2 Story", "2 Story", "2 Story", "2 Story", "2 Story",
"2 Story", "2 Story", "1 Story", "2 Story", "2 Story", "2 Story",
"2 Story", "2 Story", "2 Story"), STRUCT = c("Row Inside", "Row Inside",
"Row Inside", "Single", "Single", "Single", "Row Inside", "Row Inside",
"Row Inside", "Single", "Semi-Detached", "Single", "Row Inside",
"Semi-Detached", "Semi-Detached", "Single", "Row Inside", "Multi",
"Row Inside", "Row Inside"), LANDAREA = c(1575L, 1800L, 1280L,
5000L, 10252L, 3000L, 1500L, 1641L, 1358L, 6300L, 1818L, 3500L,
1280L, 5098L, 1899L, 5009L, 1152L, 2910L, 1762L, 1400L), ASSESSMENT_NBHD = c("Old City 2",
"Capitol Hill", "Old City 1", "Palisades", "Chevy Chase", "Chevy Chase",
"Eckington", "Ledroit Park", "Eckington", "Riggs Park", "Riggs Park",
"Woodridge", "Lily Ponds", "Fort Dupont Park", "Hillcrest", "Hillcrest",
"Congress Heights", "Congress Heights", "Old City 1", "Capitol Hill"
), PREDICTED_PRICE = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, 20L
), class = "data.frame")
So I need to predict and fill out the values in the "PREDICTED_PRICE" column in the second CSV dataset using the regression model I created from the first dataset

1) How do I sort multiple tables in a list by descending order? 2) How do I create dataframes from one list of multiple tables?

I have multiple tables in a list.
1) How do I sort all tables in the list by descending order? (Ideally, I'd keep my object as a list).
EDIT: Sort items in each table by descending order.
Example of what I have now:
$animals
Cat 10
Dog 20
Panda 50
Snake 40
$colors
blue 20
green 5
red 30
yellow 2
Example of what I want:
$animals
Panda 50
Snake 40
Dog 20
Cat 10
$colors
red 30
blue 20
green 5
yellow 2
2) How do I create multiple dataframes from the multiple tables in the list? For example, the first table in the list is called 'brand', and the second table in the list is called 'style'. I want to create new dataframes called df_brand and df_style.
3) I am sorry my dput() is long. I could not figure out how to print the head() of my list of multiple tables. If you know how to do that, I would appreciate a solution for that too.
x <- list(brand = structure(c(`1 To 3 Noodles` = 1L, `7 Select` = 2L,
`7 Select/Nissin` = 1L, `A-One` = 4L, `A-Sha Dry Noodle` = 26L,
A1 = 3L, ABC = 12L, Acecook = 15L, Adabi = 4L, `Ah Lai` = 2L,
Ajinatori = 2L, Amianda = 10L, Amino = 3L, `Annie Chun's` = 12L,
Aroi = 2L, `Asia Gold` = 4L, `Asian Thai Foods` = 14L, `Authentically Asian` = 1L,
Azami = 5L, Baijia = 11L, `Baixiang Noodles` = 5L, Baltix = 2L,
Bamee = 5L, Batchelors = 16L, `Binh Tay` = 3L, `Bon Go Jang` = 2L,
Bonasia = 4L, Boss = 1L, `Campbell's` = 3L, `Cap Atoom Bulan` = 1L,
CarJEN = 7L, `Chaudhary's Wai Wai` = 1L, Chencun = 5L, `Chering Chang` = 5L,
Chewy = 8L, Chikara = 1L, `China Best` = 1L, `Ching's Secret` = 4L,
`Chorip Dong` = 1L, ChoripDong = 1L, Choumama = 1L, `Chuan Wei Wang` = 2L,
Cintan = 5L, `CJ CheilJedang` = 2L, Conimex = 5L, `Crystal Noodle` = 1L,
`Curry Prince` = 1L, Daddy = 1L, Daifuku = 1L, Daikoku = 6L,
Daraz = 1L, Deshome = 13L, Doll = 16L, Dongwon = 1L, `Dr. McDougall's` = 1L,
Dragonfly = 13L, `Dream Kitchen` = 4L, `E-mi` = 2L, `E-Zee` = 3L,
`Eat & Go` = 5L, Econsave = 1L, Emart = 7L, Fantastic = 6L, `Farmer's Heart` = 1L,
`Fashion Food` = 3L, `Fashion Foods` = 5L, FMF = 2L, Foodmon = 2L,
`Forest Noodles` = 4L, Fortune = 4L, `Four Seas` = 8L, `Fu Chang Chinese Noodle Company` = 1L,
`Fuji Mengyo` = 1L, Fujiwara = 7L, Fuku = 10L, GaGa = 7L, `Gau Do` = 2L,
Gefen = 4L, GGE = 1L, `Global Inspiration` = 1L, `Goku-Uma` = 4L,
`Goku Uma` = 3L, `Golden Mie` = 3L, `Golden Wheat` = 12L, `Golden Wonder` = 1L,
Gomex = 2L, `Good Tto Leu Foods` = 1L, `Great Value` = 7L, GreeNoodle = 4L,
GS25 = 2L, `Guava Story` = 1L, Haioreum = 1L, `Han's South Korea` = 3L,
Hankow = 2L, `Hao Way` = 8L, `Happy Cook` = 3L, `Happy Family` = 2L,
Healtimie = 2L, `Hi-Myon` = 2L, Higashi = 1L, Higashimaru = 1L,
HoMyeonDang = 5L, Hosoonyi = 1L, `Hsin Tung Yang` = 1L, `Hua Feng` = 1L,
`Hua Feng Noodle Expert` = 2L, Ibumie = 10L, IbuRamen = 3L, ICA = 2L,
`Ikeda Shoku` = 2L, iMee = 4L, Indomie = 53L, iNoodle = 2L, Ishimaru = 1L,
Itomen = 5L, Itsuki = 4L, J.J. = 2L, `Jackpot Teriyaki` = 1L,
JFC = 2L, Jingqi = 8L, JML = 23L, `Just Way` = 2L, `Kabuto Noodles` = 5L,
Kailo = 3L, Kamfen = 15L, `Kang Shi Fu` = 5L, Katoz = 1L, `Kiki Noodle` = 2L,
`Kim's Bowl` = 1L, `Kim Ve Wong` = 1L, Kimura = 1L, `Kin-Dee` = 2L,
Knorr = 8L, `Ko-Lee` = 10L, `Koh Thai` = 4L, Koka = 18L, KOKA = 25L,
`Komforte Chockolates` = 1L, Koyo = 7L, Kumamoto = 1L, Kuriki = 3L,
`La Fonte` = 2L, `La Moderna` = 1L, `Lee Fah Mee` = 1L, Lele = 1L,
`Liang Cheng Mai` = 1L, Lipton = 1L, Lishan = 1L, `Lishan Food Manufacturing` = 1L,
`Little Cook` = 14L, `Liu Quan` = 1L, `Long Jun Hang` = 2L, `Long Kow` = 5L,
`Lotus Foods` = 3L, `Love Cook` = 5L, `Lucky Me!` = 34L, Maggi = 30L,
Maitri = 1L, Mama = 71L, MAMA = 27L, `Mama Pat's` = 4L, Mamee = 29L,
Maruchan = 76L, Marutai = 7L, `Master Kong` = 28L, `Mee Jang` = 7L,
`Men-Sunaoshi` = 2L, Menraku = 8L, `Mexi-Ramen` = 1L, `Mi E-Zee` = 5L,
`Mi Sedaap` = 12L, `Mie Sedaap` = 1L, Migawon = 1L, Miliket = 1L,
`Miracle Noodle` = 1L, Mitoku = 1L, `Mom's Dry Noodle` = 6L,
Morre = 1L, `Mr. Lee's Noodles` = 6L, `Mr. Noodles` = 15L, `Mr. Udon` = 4L,
`Mug Shot` = 2L, `Mum Ngon` = 1L, MyKuali = 24L, Myojo = 63L,
MyOri = 5L, `Nagao Noodle` = 1L, Nagatanien = 1L, `Nakaya Shouten` = 1L,
`Nan Hsing` = 1L, `Nan Jie Cun` = 1L, `Nanyang Chef` = 2L, `New Touch` = 9L,
`New Way` = 1L, Nissin = 381L, `No Name` = 2L, `Noah Foods` = 2L,
Nongshim = 98L, `Noodle Time` = 2L, `Nyor Nyar` = 2L, `O Sung` = 1L,
Ogasawara = 2L, Ohsung = 3L, Omachi = 1L, `One Dish Asia` = 1L,
`Oni Hot Pot` = 4L, `ORee Garden` = 1L, `Osaka Ramen` = 1L, Ottogi = 46L,
Oyatsu = 4L, Paldo = 66L, `Paldo Vina` = 3L, Pama = 4L, Pamana = 1L,
Papa = 1L, Patanjali = 1L, Payless = 6L, Peyang = 1L, Pirkka = 3L,
`Plats Du Chef` = 1L, `Pop Bihun` = 3L, `Pot Noodle` = 11L, Pran = 2L,
Premiere = 2L, President = 1L, `President Rice` = 1L, Prima = 4L,
`Prima Taste` = 7L, Pringles = 1L, Pulmuone = 8L, Q = 1L, `Qin Zong` = 1L,
Quickchow = 5L, `Rhee Bros Assi` = 6L, `Right Foods` = 1L, `Ripe'n'Dry` = 3L,
`Rocket Brand` = 1L, Roland = 2L, `Royal Umbrella` = 2L, Ruski = 6L,
`S&S` = 1L, Sahmyook = 1L, `Saigon Ve Wong` = 13L, `Sainsbury's` = 5L,
Saji = 2L, `Sakura Noodle` = 5L, Sakurai = 1L, `Sakurai Foods` = 10L,
`Salam Mie` = 2L, `Samurai Ramen` = 1L, Samyang = 19L, `Samyang Foods` = 52L,
Sanpo = 1L, Sanrio = 1L, `Sanyo Foods` = 1L, `Sao Tao` = 4L,
`Sapporo Ichiban` = 25L, Sarimi = 7L, `Sau Tao` = 14L, Sawadee = 4L,
Sempio = 3L, `Seven-Eleven` = 1L, `Seven & I` = 1L, Shan = 5L,
Shirakiku = 11L, `Sichuan Baijia` = 10L, `Sichuan Guangyou` = 4L,
`Singa-Me` = 3L, `Six Fortune` = 6L, Smack = 1L, Snapdragon = 5L,
Sokensha = 1L, `Song Hak` = 1L, Souper = 2L, Springlife = 1L,
`Star Anise Foods` = 1L, `Sugakiya Foods` = 2L, Suimin = 8L,
`Sun Noodle` = 7L, Sunlee = 8L, Sunlight = 1L, `Sunny Maid` = 1L,
Super = 5L, `Super Bihun` = 4L, SuperMi = 8L, Sura = 1L, Sutah = 1L,
Tablemark = 3L, Takamori = 1L, `Takamori Kosan` = 14L, `Tao Kae Noi` = 1L,
`Tasty Bite` = 6L, Tayho = 1L, `Ten-In` = 2L, `Teriyaki Time` = 1L,
Tesco = 4L, `Thai Chef` = 4L, `Thai Choice` = 3L, `Thai Kitchen` = 10L,
`Thai Pavilion` = 3L, `Thai Smile` = 3L, `The Bridge` = 1L, `The Kitchen Food` = 2L,
`The Ramen Rater Select` = 1L, `Thien Houng Foods` = 1L, Tiger = 1L,
`Tiger Tiger` = 2L, `Tokachimen Koubou` = 1L, `Tokushima Seifun` = 4L,
`Tokyo Noodle` = 4L, Torishi = 1L, Tradition = 5L, TRDP = 1L,
Trident = 4L, `Tropicana Slim` = 2L, `Tseng Noodles` = 7L, TTL = 3L,
`Tung-I` = 1L, `Uncle Sun` = 2L, `Uni-President` = 12L, Unif = 13L,
`Unif-100` = 2L, `Unif / Tung-I` = 11L, `Unif Tung-I` = 1L, United = 3L,
Unox = 6L, Unzen = 1L, `Urban Noodle` = 5L, `US Canning` = 1L,
`Ve Wong` = 24L, Vedan = 6L, Vifon = 33L, `Vina Acecook` = 34L,
`Vit's` = 13L, `Wai Wai` = 25L, Wang = 6L, `Weh Lih` = 1L, `Wei Chuan` = 2L,
`Wei Lih` = 15L, `Wei Wei` = 3L, Westbrae = 1L, `Western Family` = 6L,
`World O' Noodle` = 2L, `Wu-Mu` = 12L, `Wu Mu` = 7L, Wugudaochang = 10L,
`Xiao Ban Mian` = 3L, Xiuhe = 1L, Yamachan = 11L, Yamadai = 1L,
Yamamori = 2L, Yamamoto = 4L, `Yum-Mie` = 1L, `Yum Yum` = 12L,
`Zow Zow` = 1L), .Dim = 355L, .Dimnames = structure(list(c("1 To 3 Noodles",
"7 Select", "7 Select/Nissin", "A-One", "A-Sha Dry Noodle", "A1",
"ABC", "Acecook", "Adabi", "Ah Lai", "Ajinatori", "Amianda",
"Amino", "Annie Chun's", "Aroi", "Asia Gold", "Asian Thai Foods",
"Authentically Asian", "Azami", "Baijia", "Baixiang Noodles",
"Baltix", "Bamee", "Batchelors", "Binh Tay", "Bon Go Jang", "Bonasia",
"Boss", "Campbell's", "Cap Atoom Bulan", "CarJEN", "Chaudhary's Wai Wai",
"Chencun", "Chering Chang", "Chewy", "Chikara", "China Best",
"Ching's Secret", "Chorip Dong", "ChoripDong", "Choumama", "Chuan Wei Wang",
"Cintan", "CJ CheilJedang", "Conimex", "Crystal Noodle", "Curry Prince",
"Daddy", "Daifuku", "Daikoku", "Daraz", "Deshome", "Doll", "Dongwon",
"Dr. McDougall's", "Dragonfly", "Dream Kitchen", "E-mi", "E-Zee",
"Eat & Go", "Econsave", "Emart", "Fantastic", "Farmer's Heart",
"Fashion Food", "Fashion Foods", "FMF", "Foodmon", "Forest Noodles",
"Fortune", "Four Seas", "Fu Chang Chinese Noodle Company", "Fuji Mengyo",
"Fujiwara", "Fuku", "GaGa", "Gau Do", "Gefen", "GGE", "Global Inspiration",
"Goku-Uma", "Goku Uma", "Golden Mie", "Golden Wheat", "Golden Wonder",
"Gomex", "Good Tto Leu Foods", "Great Value", "GreeNoodle", "GS25",
"Guava Story", "Haioreum", "Han's South Korea", "Hankow", "Hao Way",
"Happy Cook", "Happy Family", "Healtimie", "Hi-Myon", "Higashi",
"Higashimaru", "HoMyeonDang", "Hosoonyi", "Hsin Tung Yang", "Hua Feng",
"Hua Feng Noodle Expert", "Ibumie", "IbuRamen", "ICA", "Ikeda Shoku",
"iMee", "Indomie", "iNoodle", "Ishimaru", "Itomen", "Itsuki",
"J.J.", "Jackpot Teriyaki", "JFC", "Jingqi", "JML", "Just Way",
"Kabuto Noodles", "Kailo", "Kamfen", "Kang Shi Fu", "Katoz",
"Kiki Noodle", "Kim's Bowl", "Kim Ve Wong", "Kimura", "Kin-Dee",
"Knorr", "Ko-Lee", "Koh Thai", "Koka", "KOKA", "Komforte Chockolates",
"Koyo", "Kumamoto", "Kuriki", "La Fonte", "La Moderna", "Lee Fah Mee",
"Lele", "Liang Cheng Mai", "Lipton", "Lishan", "Lishan Food Manufacturing",
"Little Cook", "Liu Quan", "Long Jun Hang", "Long Kow", "Lotus Foods",
"Love Cook", "Lucky Me!", "Maggi", "Maitri", "Mama", "MAMA",
"Mama Pat's", "Mamee", "Maruchan", "Marutai", "Master Kong",
"Mee Jang", "Men-Sunaoshi", "Menraku", "Mexi-Ramen", "Mi E-Zee",
"Mi Sedaap", "Mie Sedaap", "Migawon", "Miliket", "Miracle Noodle",
"Mitoku", "Mom's Dry Noodle", "Morre", "Mr. Lee's Noodles", "Mr. Noodles",
"Mr. Udon", "Mug Shot", "Mum Ngon", "MyKuali", "Myojo", "MyOri",
"Nagao Noodle", "Nagatanien", "Nakaya Shouten", "Nan Hsing",
"Nan Jie Cun", "Nanyang Chef", "New Touch", "New Way", "Nissin",
"No Name", "Noah Foods", "Nongshim", "Noodle Time", "Nyor Nyar",
"O Sung", "Ogasawara", "Ohsung", "Omachi", "One Dish Asia", "Oni Hot Pot",
"ORee Garden", "Osaka Ramen", "Ottogi", "Oyatsu", "Paldo", "Paldo Vina",
"Pama", "Pamana", "Papa", "Patanjali", "Payless", "Peyang", "Pirkka",
"Plats Du Chef", "Pop Bihun", "Pot Noodle", "Pran", "Premiere",
"President", "President Rice", "Prima", "Prima Taste", "Pringles",
"Pulmuone", "Q", "Qin Zong", "Quickchow", "Rhee Bros Assi", "Right Foods",
"Ripe'n'Dry", "Rocket Brand", "Roland", "Royal Umbrella", "Ruski",
"S&S", "Sahmyook", "Saigon Ve Wong", "Sainsbury's", "Saji", "Sakura Noodle",
"Sakurai", "Sakurai Foods", "Salam Mie", "Samurai Ramen", "Samyang",
"Samyang Foods", "Sanpo", "Sanrio", "Sanyo Foods", "Sao Tao",
"Sapporo Ichiban", "Sarimi", "Sau Tao", "Sawadee", "Sempio",
"Seven-Eleven", "Seven & I", "Shan", "Shirakiku", "Sichuan Baijia",
"Sichuan Guangyou", "Singa-Me", "Six Fortune", "Smack", "Snapdragon",
"Sokensha", "Song Hak", "Souper", "Springlife", "Star Anise Foods",
"Sugakiya Foods", "Suimin", "Sun Noodle", "Sunlee", "Sunlight",
"Sunny Maid", "Super", "Super Bihun", "SuperMi", "Sura", "Sutah",
"Tablemark", "Takamori", "Takamori Kosan", "Tao Kae Noi", "Tasty Bite",
"Tayho", "Ten-In", "Teriyaki Time", "Tesco", "Thai Chef", "Thai Choice",
"Thai Kitchen", "Thai Pavilion", "Thai Smile", "The Bridge",
"The Kitchen Food", "The Ramen Rater Select", "Thien Houng Foods",
"Tiger", "Tiger Tiger", "Tokachimen Koubou", "Tokushima Seifun",
"Tokyo Noodle", "Torishi", "Tradition", "TRDP", "Trident", "Tropicana Slim",
"Tseng Noodles", "TTL", "Tung-I", "Uncle Sun", "Uni-President",
"Unif", "Unif-100", "Unif / Tung-I", "Unif Tung-I", "United",
"Unox", "Unzen", "Urban Noodle", "US Canning", "Ve Wong", "Vedan",
"Vifon", "Vina Acecook", "Vit's", "Wai Wai", "Wang", "Weh Lih",
"Wei Chuan", "Wei Lih", "Wei Wei", "Westbrae", "Western Family",
"World O' Noodle", "Wu-Mu", "Wu Mu", "Wugudaochang", "Xiao Ban Mian",
"Xiuhe", "Yamachan", "Yamadai", "Yamamori", "Yamamoto", "Yum-Mie",
"Yum Yum", "Zow Zow")), .Names = ""), class = "table"), style = structure(c(2L,
Bar = 1L, Bowl = 481L, Box = 6L, Can = 1L, Cup = 450L, Pack = 1531L,
Tray = 108L), .Dim = 8L, .Dimnames = structure(list(c("", "Bar",
"Bowl", "Box", "Can", "Cup", "Pack", "Tray")), .Names = ""), class = "table"),
country = structure(c(Australia = 22L, Bangladesh = 7L, Brazil = 5L,
Cambodia = 5L, Canada = 41L, China = 169L, Colombia = 6L,
Dubai = 3L, Estonia = 2L, Fiji = 4L, Finland = 3L, Germany = 27L,
Ghana = 2L, Holland = 4L, `Hong Kong` = 137L, Hungary = 9L,
India = 31L, Indonesia = 126L, Japan = 352L, Malaysia = 156L,
Mexico = 25L, Myanmar = 14L, Nepal = 14L, Netherlands = 15L,
Nigeria = 1L, Pakistan = 9L, Philippines = 47L, Poland = 4L,
Sarawak = 3L, Singapore = 109L, `South Korea` = 309L, Sweden = 3L,
Taiwan = 224L, Thailand = 191L, UK = 69L, `United States` = 1L,
USA = 323L, Vietnam = 108L), .Dim = 38L, .Dimnames = structure(list(
c("Australia", "Bangladesh", "Brazil", "Cambodia", "Canada",
"China", "Colombia", "Dubai", "Estonia", "Fiji", "Finland",
"Germany", "Ghana", "Holland", "Hong Kong", "Hungary",
"India", "Indonesia", "Japan", "Malaysia", "Mexico",
"Myanmar", "Nepal", "Netherlands", "Nigeria", "Pakistan",
"Philippines", "Poland", "Sarawak", "Singapore", "South Korea",
"Sweden", "Taiwan", "Thailand", "UK", "United States",
"USA", "Vietnam")), .Names = ""), class = "table"), whole_stars = structure(c(`0` = 54L,
`1` = 103L, `2` = 250L, `3` = 1043L, `4` = 741L, `5` = 386L,
U = 3L), .Dim = 7L, .Dimnames = structure(list(c("0", "1",
"2", "3", "4", "5", "U")), .Names = ""), class = "table"),
top_rank = structure(c(2539L, `
` = 4L, `1` = 5L, `10` = 5L,
`2` = 2L, `3` = 2L, `4` = 4L, `5` = 3L, `6` = 4L, `7` = 4L,
`8` = 3L, `9` = 5L), .Dim = 12L, .Dimnames = structure(list(
c("", "\n", "1", "10", "2", "3", "4", "5", "6", "7",
"8", "9")), .Names = ""), class = "table"), top_year = structure(c(2539L,
`
` = 4L, `2012` = 9L, `2013` = 7L, `2014` = 8L, `2015` = 7L,
`2016` = 6L), .Dim = 7L, .Dimnames = structure(list(c("",
"\n", "2012", "2013", "2014", "2015", "2016")), .Names = ""), class = "table"))
To sort each component, use lapply:
sorted <- lapply(x, sort, decreasing = TRUE)
To convert the tables to dataframes, use as.data.frame. This gives you a list of dataframes, then changes the names:
df <- lapply(sorted, as.data.frame)
names(df) <- paste0("df_", names(sorted))
If you also want these as separate variables (which is probably not a good idea), you could use
for (n in names(df)) assign(n, df[[n]])
To get the head of each element of the list, use lapply again:
lapply(df, head)
This gives output starting out as
$df_brand
Var1 Freq
1 Nissin 381
2 Nongshim 98
3 Maruchan 76
4 Mama 71
5 Paldo 66
6 Myojo 63
$df_style
Var1 Freq
1 Pack 1531
2 Bowl 481
3 Cup 450
4 Tray 108
5 Box 6
6 2

ggplot plotting vertical lines only?

When entering the following code, I get a weird ggplot where it plots vertical lines.
ggplot(data = otherdata, aes(x = subject, y = pct_.below)) + geom_point(aes(colour = subgroup))
When doing geom_point rather than geom_line, I get the other graph. I have no idea why this happens. There are more points than there are subgroups but that's not the solution to the issue. What do I do to fix this ggplot?
# dummy data
set.seed(45)
df <- data.frame(x=rep(1:5, 9), val=sample(1:100, 45),
variable=rep(paste0("category", 1:9), each=5))
# plot
ggplot(data = df, aes(x=x, y=val)) + geom_line(aes(colour=variable))
That code that I just posted works but I have no idea what the difference is between the two codes.
First 20 rows of the data:
structure(list(subject = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Algebra II",
"Biology I", "Chemistry", "English I", "English II", "English III",
"Geometry", "Int Math I", "Int Math II", "Int Math III", "US History"
), class = "factor"), pct_.below = c(0, 12.5, 12.4, 12.5, 0,
0, 12.5, 8.4, 11.1, 12.8, 11.9, 0, 11.5, 9, 100, 66.7, 100, 100,
100, 50), subgroup = structure(c(2L, 3L, 4L, 5L, 7L, 10L, 11L,
12L, 13L, 15L, 16L, 17L, 18L, 19L, 3L, 4L, 5L, 8L, 10L, 11L), .Label = c("All Students",
"Asian", "Black or African Amer", "Black/Hispanic/Native Amer",
"ED", "English Learner T 1-2", "English Learner T 1-4", "English Learners",
"English Learners with T 1-2", "English Learners with T 1-4",
"Hispanic", "Non-Black/Hispanic/Native Amer", "Non-ED", "Non-English Learners/T 1-2",
"Non-English Learners/T 1-4", "Non-Students with Disabilities",
"Students with Disabilities", "Super Subgroup", "White"), class = "factor")), row.names = c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 32L,
33L, 34L, 35L, 36L, 37L), class = "data.frame")

Read CSV file up to line with unique marker

I have many data sets that have extra information beyond a certain line. The files are all csv. I would be able to loop through them and read.csv with "skip" argument to clean the top of the data, but the length of the data frames are all different. The only commonality is the "--------------- ---------------- ------ -----" line in the Total column that separates the meaningful data from summaries and extraneous info below it.
Here's how I'm reading in the data without skip = 14 (which is standard across everything).
before<-read.csv("Example.csv", header = FALSE,
col.names = c("CountryID","Name","Type","Symbol","Code","Unit",
"Total", "Measurement", "Value", "Percent", "CO2" ))
However, the ----- marker maybe a different row, but it's the first thing to hit. Here's the data before:
structure(list(CountryID = structure(c(26L, 19L, 21L, 23L, 21L,
7L, 1L, 1L, 1L, 22L, 3L, 1L, 19L, 2L, 8L, 14L, 15L, 13L, 9L,
12L, 18L, 17L, 8L, 13L, 15L, 10L, 8L, 8L, 11L, 16L, 1L, 1L, 1L,
20L, 4L, 6L, 1L, 25L, 5L, 1L, 1L, 1L, 24L, 1L), .Label = c("",
"------------", "-------------", "---------------", "------------------",
" ", "08.15.1997", "10000", "15000", "200", "2000", "2500", "3000",
"45000", "5000", "7000", "8000", "8300", "Country", "Output",
"Production", "Quantity", "Serial Output", "TOTAL SUM", "Unaccounted",
"United Nations Data"), class = "factor"), Name = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 20L, 2L, 1L, 1L, 1L, 21L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 19L, 1L, 1L, 1L, 1L), .Label = c("",
"--------------------", " ", "Bahrain", "Bangladesh", "Barbados",
"Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia",
"Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria",
"Burkina Faso", "Chad", "Name", "The Bahamas"), class = "factor"),
Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 4L,
2L, 1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("", "----", " ", "Code", "Type",
"Unit"), class = "factor"), Symbol = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 20L, 22L, 2L, 1L, 1L, 1L, 4L, 5L,
6L, 7L, 9L, 8L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 21L, 1L, 1L, 1L,
1L), .Label = c("", "------------", " ", "BAHM", "BAHR",
"BANG", "BARB", "BELGM", "BELS", "BELZ", "BEN", "BHUT", "BOL",
"BOSHER", "BOTS", "BRAZ", "BRUN", "BULG", "BURKF", "Country",
"private", "Symbol"), class = "factor"), Code = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 19L, 2L, 1L, 1L, 1L, 12L,
15L, 11L, 17L, 4L, 13L, 14L, 9L, 18L, 10L, 5L, 16L, 3L, 7L,
8L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "------------", "1504944270", "2287368539",
"2388991307", "2453202442", "2561470743", "3205402223", "3221488867",
"3230369605", "3247578406", "3712013344", "4307638090", "462793263",
"4835205752", "4854959101", "5842098895", "5932776587", "Code"
), class = "factor"), Unit = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 17L, 16L, 2L, 1L, 1L, 1L, 7L, 9L, 10L, 14L,
12L, 15L, 15L, 11L, 13L, 3L, 8L, 13L, 15L, 6L, 5L, 9L, 1L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"-------------", "100", "1109", "27", "35", "40", "45", "58",
"70", "74", "77", "79", "82", "95", "Output", "Per Unit"), class = "factor"),
Total = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 25L,
24L, 2L, 1L, 1L, 1L, 18L, 5L, 17L, 8L, 23L, 20L, 6L, 9L,
7L, 11L, 12L, 13L, 19L, 15L, 14L, 10L, 3L, 16L, 1L, 1L, 1L,
16L, 1L, 1L, 1L, 21L, 1L, 3L, 22L, 4L), .Label = c("", "---------------",
"--------------- ---------------- ------ -----",
"=============== ================ ====== =====",
"126912", "147431", "170553", "175973", "203728", "230761",
"293789", "304471", "376281", "386526", "399160", "4417002",
"476025", "478030", "502999", "51012", "5610654", "56406056",
"93351", "Output", "Total"), class = "factor"), Measurement = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 2L, 1L, 1L, 1L, 3L,
9L, 3L, 4L, 10L, 9L, 6L, 4L, 5L, 10L, 7L, 9L, 4L, 8L, 10L,
9L, 1L, 1L, 1L, 1L, 1L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "--------", "20", "23", "24", "26", "27",
"28", "29", "30", "420", "Measurement"), class = "factor"),
Value = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 22L,
23L, 2L, 1L, 1L, 1L, 5L, 19L, 11L, 8L, 3L, 18L, 13L, 6L,
4L, 9L, 14L, 17L, 7L, 10L, 12L, 15L, 1L, 16L, 1L, 1L, 1L,
16L, 1L, 1L, 1L, 20L, 1L, 1L, 21L, 1L), .Label = c("", "----------------",
"15150240", "15891735", "16083459", "16959919", "20350968",
"20909501", "21770264", "25121096", "27726279", "30024743",
"34069742", "34841369", "38498281", "468004111", "49524999",
"50512814", "50568702", "540650", "64506", "Country", "Value"
), class = "factor"), Percent = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 23L, 2L, 1L, 1L, 1L, 11L, 12L, 8L, 3L,
17L, 16L, 5L, 10L, 20L, 9L, 6L, 7L, 4L, 15L, 14L, 22L, 1L,
13L, 1L, 1L, 1L, 21L, 1L, 1L, 1L, 19L, 1L, 1L, 18L, 1L), .Label = c("",
"------", "102", "104", "106", "112", "126", "129", "142",
"15", "160", "177", "1775", "180", "191", "24", "25", "5640645",
"650163", "87", "887.5", "95", "Production Percent"), class = "factor"),
CO2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 14L,
2L, 1L, 1L, 1L, 9L, 4L, 9L, 7L, 4L, 5L, 4L, 7L, 4L, 9L, 4L,
11L, 4L, 12L, 10L, 4L, 1L, 6L, 1L, 1L, 1L, 8L, 1L, 1L, 1L,
3L, 1L, 1L, 13L, 1L), .Label = c("", "-----", "?", "0", "0.2",
"0.6", "1", "19.4", "2", "2.2", "4", "5", "564065", "CO2",
"Cur."), class = "factor")), class = "data.frame", row.names = c(NA,
-44L))
And here's how I'm hoping it could look:
structure(list(CountryID = c(10000L, 45000L, 5000L, 3000L, 15000L,
2500L, 8300L, 8000L, 10000L, 3000L, 5000L, 200L, 10000L, 10000L,
2000L, 7000L), Name = structure(c(16L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L), .Label = c("Bahrain",
"Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin",
"Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
"Brunei", "Bulgaria", "Burkina Faso", "The Bahamas"), class = "factor"),
Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Unit", class = "factor"),
Symbol = structure(c(1L, 2L, 3L, 4L, 6L, 5L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L), .Label = c("BAHM", "BAHR",
"BANG", "BARB", "BELGM", "BELS", "BELZ", "BEN", "BHUT", "BOL",
"BOSHER", "BOTS", "BRAZ", "BRUN", "BULG", "BURKF"), class = "factor"),
Code = c(3712013344, 4835205752, 3247578406, 5842098895,
2287368539, 4307638090, 462793263, 3221488867, 5932776587,
3230369605, 2388991307, 4854959101, 1504944270, 2561470743,
3205402223, 2453202442), Unit = c(40L, 58L, 70L, 82L, 77L,
95L, 95L, 74L, 79L, 100L, 45L, 79L, 95L, 35L, 27L, 58L),
Total = c(478030L, 126912L, 476025L, 175973L, 93351L, 51012L,
147431L, 203728L, 170553L, 293789L, 304471L, 376281L, 502999L,
399160L, 386526L, 230761L), Measurement = c(20L, 29L, 20L,
23L, 30L, 29L, 26L, 23L, 24L, 30L, 27L, 29L, 23L, 28L, 30L,
29L), Value = c(16083459L, 50568702L, 27726279L, 20909501L,
15150240L, 50512814L, 34069742L, 16959919L, 15891735L, 21770264L,
34841369L, 49524999L, 20350968L, 25121096L, 30024743L, 38498281L
), Percent = c(160L, 177L, 129L, 102L, 25L, 24L, 106L, 15L,
87L, 142L, 112L, 126L, 104L, 191L, 180L, 95L), CO2 = c(2,
0, 2, 1, 0, 0.2, 0, 1, 0, 2, 0, 4, 0, 5, 2.2, 0)), class = "data.frame", row.names = c(NA,
-16L))
Can this be integrated into the read.csv argument, or is it easier to clean the bottom of it some other way.
Three thoughts:
Use readLines (as #user2554330 suggested), find/remove the specific row, filter it, then parse the text vector with read.csv, the least of the three.
before[seq_len(min(head(which(!grepl("^[^- ]+$", before$Total)),1)-1L,nrow(before))),]; a bit complicated, granted, but it does what you need (assuming that you've already filtered the first 14 rows with skip=.
Use an external script such as sed -e '1,14d;/^[ -]\+$/{g;q;} in a pipe(...)-type thing.
Read it twice. The first time, use readLines("Example.csv"), and look through the lines for the marker of the end of data. Say it's on line n. Then in the second read, use
read.csv("Example.csv", header = FALSE,
col.names = c("CountryID","Name","Type","Symbol","Code","Unit",
"Total", "Measurement", "Value", "Percent", "CO2" ), nrows = n - 1)
(or maybe nrows will need to be a different value, if you're skipping some).

ggplot2: geom_area with factorial x-axis

I am trying to make a stacked area plot. My x-axis is age categories, but I want to connect them as a continuous scale.
I have number of diagnoses on the y (filled by diagnosis) and age on the x.
subset of data:
structure(list(diag = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Hjernerystelse",
"Lungesygdomme"), class = "factor"), age = structure(c(1L, 2L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L), .Label = c("0 år", "1-4 år",
"5-9 år", "10-14 år", "15-19 år", "20-24 år", "25-29 år", "30-34 år",
"35-39 år", "40-44 år", "45-49 år", "50-54 år", "55-59 år", "60-64 år",
"65-69 år", "70-74 år", "75-79 år", "80-84 år", "85- år"), class = "factor"),
n = c(15L, 89L, 87L, 71L, 46L, 32L, 26L, 24L, 32L, 40L, 74L,
55L, 39L, 19L, 38L, 27L, 24L, 14L, 23L, 291L, 2170L, 267L,
269L, 234L, 244L, 256L, 336L, 432L, 638L, 458L, 792L, 1010L,
1401L, 2088L, 2087L, 1815L, 1767L, 1995L)), row.names = c(NA,
-38L), .Names = c("diag", "age", "n"), class = c("tbl_df", "tbl",
"data.frame"))
The following gives a useful plot, but doesn't conserve the categories on the x-axis:
plot1 <- ggplot(foo, aes(age, n)) +
geom_area(aes(x=as.numeric(factor(age)), fill=diag))
Is there a simple way to use the geom_area on factorial data, or just a way to show the levels on the x-axis.
An other problem is that by data have 29 different diagnoses giving this result:
Try this approach:
library(directlabels)
ggplot(foo, aes(x=as.numeric(factor(age)), y = n, fill=diag)) +
geom_area() +
scale_x_discrete(labels = levels(foo$age)) +
geom_dl(aes(label = diag), list("top.points", cex = .6)) +
guides(fill = FALSE)
Edit:
With regards to your comment, try this
download.file("https://dl.dropboxusercontent.com/u/12226044/admissions.Rdata",
destfile = fn <- file.path(tempdir(), "admissions.Rdata"),
mode = "wb")
load(fn)
library(ggplot2)
library(directlabels)
library(dplyr)
shaped %>%
group_by(diag, age) %>%
summarise(n = mean(n)) %>%
ggplot(aes(x=as.numeric(factor(age)), y = n, fill=diag)) +
geom_area(position = "stack") +
scale_x_discrete(labels = levels(shaped$age), expand = c(.1, .1)) +
geom_dl(aes(label = diag, color = diag), position = "stack", list("last.bumpup", rot=-30, cex = .5)) +
guides(fill = FALSE, colour = FALSE)

Resources