Using a regression model to predict values - r

I am currently working with 2 separate CSV datasets. I have already used the first data set named PRICEtable4.1 to visualize a relationship between the x values (GBA) and the y values (PRICE). I have attached a picture of the graph right below.
What I need to do now is use that cubic regression model from the first CSV dataset to predict the y values (PREDICTED_PRICE) in the second CSV dataset based on the x values (GBA) given. Is there a function that lets me make that connection? The code I used to create the regression model is below
train_X <- PRICEtable4.1$GBA
train_y <- PRICEtable4.1$PRICE
test_X <- PRICEtable4.1$GBA
test_y <- PRICEtable4.1$PRICE
X <- train_X
view(X)
y <- train_y
View(y)
poly_order <- 3
model <- lm (y~poly(X, poly_order))
print(model)
#MSE
test_yhat <- predict(model, data.frame (X = test_X))
MSE <- mean((test_y-test_yhat )^2)
print(MSE)
#R squared
test_ymean <- mean(test_y)
test_yhatmean <- mean( test_yhat)
R_squared <- (sum((test_yhat-test_yhatmean)*(test_y-test_ymean)))^2/(sum((test_yhat-test_yhatmean)^2)*sum((test_y-test_ymean)^2))
print(R_squared)
error2 <- data.frame(MSE=c(MSE),R_squared=c(R_squared))
View(error2)
#Visualization of the model
X_new = X
View(X_new)
y_new <- predict(model, data.frame (X = X_new))
View(y_new)
PRICEmodel <- ggplot(PRICEtable4.1,aes(x=GBA,y=PRICE))+geom_point(size=2)
PRICEmodel+geom_line(aes(x=X_new,y=y_new),color="Red") + labs(x="Gross building area (ft^2)", y="Price", title="Price Regression Model")
Here's a dput of the first dataset named PRICEtable4.1 (first 20 rows)
structure(list(ID = c(1L, 2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), GBA = c(1324L,
2120L, 1216L, 1804L, 1836L, 1228L, 1312L, 1262L, 1461L, 1120L,
1037L, 832L, 1500L, 920L, 1565L, 1134L, 1184L, 1420L, 2082L,
1422L), PRICE = c(1375000L, 1467000L, 549410L, 1180000L, 828000L,
742000L, 829000L, 710000L, 775000L, 380000L, 600000L, 189000L,
200000L, 265000L, 560000L, 300000L, 200000L, 940000L, 1050000L,
979000L)), row.names = c(NA, 20L), class = "data.frame")
Here's the dput of the second CSV dataset named Test (first 20 rows)
structure(list(ID = c(1L, 2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), GBA = c(1324L,
2120L, 1216L, 1804L, 1836L, 1228L, 1312L, 1262L, 1461L, 1120L,
1037L, 832L, 1500L, 920L, 1565L, 1134L, 1184L, 1420L, 2082L,
1422L), PRICE = c(1375000L, 1467000L, 549410L, 1180000L, 828000L,
742000L, 829000L, 710000L, 775000L, 380000L, 600000L, 189000L,
200000L, 265000L, 560000L, 300000L, 200000L, 940000L, 1050000L,
979000L)), row.names = c(NA, 20L), class = "data.frame")
> dput(Test[1:20, ])
structure(list(ID = 1:20, BATHRM = c(2L, 2L, 1L, 3L, 4L, 2L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 3L, 2L), HF_BATHRM = c(1L,
1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 0L), HEAT = c("Forced Air", "Forced Air", "Warm Cool",
"Forced Air", "Forced Air", "Warm Cool", "Hot Water Rad", "Forced Air",
"Warm Cool", "Forced Air", "Forced Air", "Hot Water Rad", "Forced Air",
"Forced Air", "Warm Cool", "Forced Air", "Forced Air", "Warm Cool",
"Ht Pump", "Forced Air"), AC = c("Y", "Y", "N", "Y", "Y", "Y",
"N", "Y", "Y", "N", "N", "N", "N", "Y", "Y", "Y", "Y", "Y", "Y",
"Y"), NUM_UNITS = c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), ROOMS = c(9L, 7L, 6L, 7L,
13L, 5L, 7L, 7L, 6L, 7L, 7L, 8L, 5L, 8L, 5L, 8L, 6L, 8L, 10L,
7L), BEDRM = c(3L, 3L, 3L, 4L, 6L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 4L, 2L, 4L, 3L, 2L, 3L, 3L), AYB = c(1870L, 1890L, 1911L,
1920L, 1993L, 1947L, 1895L, 1910L, 1910L, 1950L, 1951L, 1928L,
1941L, 2018L, 1939L, 2018L, 1980L, 1951L, 1910L, 1908L), YR_RMDL = c(1980L,
1963L, NA, 2001L, 2018L, NA, 1987L, 2017L, NA, NA, NA, NA, NA,
NA, 1992L, NA, 2013L, 2005L, 2004L, 1984L), EYB = c(1967L, 1982L,
1957L, 1972L, 2003L, 1958L, 1957L, 1964L, 1954L, 1960L, 1951L,
1954L, 1961L, 2018L, 1957L, 2018L, 1991L, 1961L, 1975L, 1960L
), STORIES = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1.75, 2, 1, 2,
2, 2, 2, 2, 2), GBA = c(1324L, 2120L, 1216L, 1804L, 5036L, 1836L,
1228L, 1312L, 1262L, 1461L, 1120L, 1037L, 832L, 1500L, 920L,
1565L, 1134L, 1184L, 1420L, 2082L), BLDG_NUM = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), STYLE = c("2 Story", "2 Story", "2 Story", "2 Story", "2 Story",
"2 Story", "2 Story", "2 Story", "2 Story", "2 Story", "2 Story",
"2 Story", "2 Story", "1 Story", "2 Story", "2 Story", "2 Story",
"2 Story", "2 Story", "2 Story"), STRUCT = c("Row Inside", "Row Inside",
"Row Inside", "Single", "Single", "Single", "Row Inside", "Row Inside",
"Row Inside", "Single", "Semi-Detached", "Single", "Row Inside",
"Semi-Detached", "Semi-Detached", "Single", "Row Inside", "Multi",
"Row Inside", "Row Inside"), LANDAREA = c(1575L, 1800L, 1280L,
5000L, 10252L, 3000L, 1500L, 1641L, 1358L, 6300L, 1818L, 3500L,
1280L, 5098L, 1899L, 5009L, 1152L, 2910L, 1762L, 1400L), ASSESSMENT_NBHD = c("Old City 2",
"Capitol Hill", "Old City 1", "Palisades", "Chevy Chase", "Chevy Chase",
"Eckington", "Ledroit Park", "Eckington", "Riggs Park", "Riggs Park",
"Woodridge", "Lily Ponds", "Fort Dupont Park", "Hillcrest", "Hillcrest",
"Congress Heights", "Congress Heights", "Old City 1", "Capitol Hill"
), PREDICTED_PRICE = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, 20L
), class = "data.frame")
So I need to predict and fill out the values in the "PREDICTED_PRICE" column in the second CSV dataset using the regression model I created from the first dataset

Related

xAxis order of R highcharter column plot

With the following data frame:
dta <- structure(list(sociodemographic_var = structure(c(3L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 26L, 18L, 20L, 21L, 26L, 13L, 16L, 21L, 22L, 26L, 26L,
9L, 13L, 17L, 18L, 20L, 21L, 23L, 26L, 20L, 26L), levels = c("1st grade",
"2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
"7th grade", "8th grade", "9th grade", "10th grade", "11th grade",
"12th grade, no diploma", "High school graduate", "GED or equivalent",
"Some college, no degree", "Less than 1 year of college credit/post-secondary education (or less than 10 classes)",
"One year or more of college credit, no degree", "Associate degree: Occupational, Technical, or Vocational",
"Associate degree: Academic Program", "Bachelor's degree (ex. BA, AB, BS, BBS)",
"Master's degree (ex. MA, MS, MEng, MEd, MBA)", "Professional School degree (ex. MD, DDS, DVN, JD)",
"Doctoral degree (ex. PhD, EdD)", "Refused to answer", "Don't Know",
"unknown"), class = "factor"), event = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 7L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 11L, 11L), levels = c("Baseline", "0.5 Year", "1 Year",
"1.5 Year", "2 Year", "2.5 Year", "3 Year", "3.5 Year", "4 Year",
"4.5 Year", "5 Year", "5.5 Year", "6 Year", "Screener"), class = "factor"),
visit_type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), levels = c("on-site", "hybrid", "remote", "unknown"), class = "factor"),
n = c(2L, 13L, 5L, 9L, 15L, 18L, 26L, 25L, 192L, 27L, 485L,
224L, 183L, 1011L, 666L, 55L, 78L, 3L, 9L, 1L, 1L, 2L, 208L,
1L, 1L, 1L, 1L, 126L, 28L, 1L, 1L, 2L, 2L, 3L, 4L, 1L, 543L,
1L, 300L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-39L))
I would assume that, generating a highcharter bar plot with:
library(highcharter) # v0.9.4
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "") |>
hc_plotOptions(series = list(stacking = "percent"))
the xAxis would be ordered by levels(dta$event):
levels(dta$event)
[1] "Baseline" "0.5 Year" "1 Year" "1.5 Year" "2 Year" "2.5 Year" "3 Year" "3.5 Year" "4 Year" "4.5 Year" "5 Year" "5.5 Year"
[13] "6 Year" "Screener"
But the ordering is different and neither alphabetical nor based on the total number of values:
I am interested to understand why it's the case and how to set the order right.
You can add categories to your hc_xAxis to make an order like this:
library(highcharter)
dta |>
hchart(type = "column", hcaes(x = "event", y = "n", group = "sociodemographic_var")) |>
hc_yAxis(title = list(text = "%"), max = 115, endOnTick = FALSE, stackLabels = list(enabled = TRUE)) |>
hc_xAxis(title = "", categories = levels(dta$event)) |>
hc_plotOptions(series = list(stacking = "percent"))
Output:

caret::confusionMatrix internal matrix error chr length of dinnames that are not equal to array extent

Problem
caret::confusionMatrix. Code in R has new functions and includes Try/Catch, but on execution of R functions, no errors except internal to matrix.
For my calculation using iteration on accuracy, to calculate confusionMatrix sensitivity, specificity, accuracy, I can do that successful on a non-loop and get all measures. But when I use variables to build the confusionMatrix inside the function (compute_seq_accuracy.func) I have included an Try/Catch block; which when run, does not show an errors; but the function also DOES NOT create the confusionMatrix correctly. So I had to create a custom function to fix the levels error from the confusionMatrix, This function is called: compute_confusion_table.func() which takes the y response variable and the predict variable matrix and fixes missing row names and col names and returns a valid contingency table (e.g, return(ccm.func.confusion_table)
The main driver program function (e.g, compute_seq_accuracy.func) takes the sequence values (01...0.9) and calls the function compute_consusion_table.func() to return a valid contingency table. BUT this function does not work correctly to calculate the confusionMatrix, There is no Try/Catch error; but inside the matrix within the function I can see that there is an internal error on the "chr length of dinnames that are not equal to array extent". This error is visible through str() on the variable embedded inside the function for debugging. I have a debugging statement str() inside compute_matrix.func() function which clearly shows the internal error on "chr length of dinnames that are not equal to array extent"
Code Attempt
loans_predict <- predict(full, newdata=loans_train_data, type="response", na.action=na.pass)
compute_seq_accuracy.func <- function(value) {
tryCatch({
csa.func.p <- factor(ifelse(loans_predict < value, "Good", "Bad"))
csa.func.confusion_table <- compute_confusion_table.func(loans_train_data$statusRank, csa.func.p)
tryCatch({
csa.cmt <- compute_matrix.func(csa.func.p, csa.func.confusion_table)
str(csa.cmt)
},
error = function(e) return(e)
)
return(csa.cmt$overall['Accuracy'])
},
error = function(e) return(e)
)
}
compute_matrix.func <- function(p, t) {
tryCatch({
cm.func.confusion_matrix <- caret::confusionMatrix(p, t, positive="1", alpha=0.05)
str(cm.func.confusion_matrix)
return(cm.func.confusion_matrix$overall['Accuracy'])
},
error = function(e) return(e)
)
}
##
compute_confusion_table.func <- function(y, p) {
tryCatch({
ccm.func.confusion_table <- table(y, p)
if(nrow(ccm.func.confusion_table)!=ncol(ccm.func.confusion_table)){
missings <- setdiff(colnames(ccm.func.confusion_table),rownames(ccm.func.confusion_table))
missing_mat <- mat.or.vec(nr = length(missings), nc = ncol(ccm.func.confusion_table))
ccm.func.confusion_table <- as.table(rbind(as.matrix(ccm.func.confusion_table), missing_mat))
rownames(ccm.func.confusion_table) <- colnames(ccm.func.confusion_table)
}
return(ccm.func.confusion_table)
},
error = function(e) return(e)
)
}
compute_for_values <- seq(0.1, 0.9, by=0.1)
csa_copmuted_accuracies <- sapply(compute_for_values, compute_seq_accuracy.func, simplify=FALSE)
names(csa_computed_accuracies) <- compute_for_values
csa_computed_accuracies[which.max(as.numeric(csa_computed_accuracies))
Additional Explanation
R confusionMatrix for the accuracy range (0.1...0.9) but ran into trouble on internal error "chr length of dinnames that are not equal to array extent" Yet since I only ran the glm2() function, and the predict() function before the confusionMatrix, I never changed the column names nor issues any rbind() that would have change the column names / order, so I am at a loss on why I have this confusionMatrix function throwing an error when the predicted and actual parameters which do not have the same number of levels. So now I understand that the functions in caret package go to great lengths to ensure predictions always have the same levels as the original classes. That is the purpose for the compute_confusion_table.func function. So in addition, also, my other question is where (?), or when (?) did this data (predicted and actual) ultimately get the levels out of sync for incorrect out of sync levels?
Data Sample
loans_predict
> dput(loans_predict[1:20])
c(`11413` = 0.803325118108046, `2561` = NA, `25337` = 0.853849488971217,
`1643` = 0.793893769102712, `14264` = 0.714721872072079, `24191` = 0.778606178072608,
`33989` = 0.890633845537385, `28193` = 0.798171905376348, `21129` = 0.898502735539081,
`7895` = 0.881058550796637, `29007` = 0.760753392722403, `26622` = 0.662375927088179,
`3065` = 0.925597132852884, `11423` = 0.763460597845282, `3953` = 0.921791256610175,
`5789` = 0.745157387706153, `30150` = 0.587949357489077, `6070` = 0.915098837460939,
`1486` = 0.680767185297498, `13195` = 0.76299062675687)
>
loans_train_data
> dput(loans_train_data[1:20,])
structure(list(loan_id = c(551879L, 442449L, 187882L, 619315L,
453195L, 513713L, 647765L, 296L, 571832L, 128305L, 358197L, 26517L,
12653L, 374086L, 305620L, 197445L, 307835L, 331697L, 550455L,
301842L), amount = c(8400L, 15000L, 15600L, 12000L, 9975L, 21000L,
20000L, 7500L, 21000L, 16000L, 4800L, 25000L, 7500L, 15600L,
3000L, 3900L, 20125L, 25000L, 11975L, 11825L), term = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L), .Label = c("36", "60"), class = "factor"), rate = c(0.12,
0.1, 0.12, 0.15, 0.16, 0.12, 0.09, 0.12, 0.11, 0.09, 0.13, 0.2,
0.07, 0.17, 0.07, 0.19, 0.19, 0.08, 0.15, 0.1), payment = c(280.17,
483.94, 518.07, 415.93, 348.78, 700.42, 640.57, 250.51, 687.42,
510.07, 161.02, 928.97, 233.27, 556.11, 92.49, 143.89, 521.95,
785.49, 282.69, 251.19), grade = structure(c(5L, 6L, 6L, 5L,
4L, 5L, 6L, 5L, 6L, 6L, 5L, 3L, 7L, 4L, 7L, 4L, 3L, 6L, 5L, 6L
), .Label = c("-1", "10", "20", "40", "60", "80", "100"), class = "factor"),
employment = c("store manager", "", "IT Director", "IT Specialist",
"Patient Serbice Rep", "Manager", "Service Technician", "E-6",
"Pharmacist/Pharmacy Supervisor", "Direct Sales Supervisor",
"Documement Associate", "Project Manager - Training", "Senior Accountant",
"Executive Assistant", "Chef", "Retail Sales, Production Assistant",
"Legislative Auditor", "System Specialist", "Lab Tech", "Store Manager"
), length = c(10, NA, 3, 6, 6, 4, 10, 8, 6, 3, 6, 10, 3,
1, 10, 5, 10, 10, 10, 4), home = structure(c(2L, 2L, 3L,
1L, 3L, 3L, 1L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 3L, 2L, 1L, 1L,
1L, 3L), .Label = c("MORTGAGE", "OWN", "RENT"), class = "factor"),
income = c(49600, 62255, 131000, 80000, 60000, 116000, 80000,
75132, 115000, 68000, 28000, 70000, 70000, 79000, 50000,
36000, 58000, 78900, 30000, 82000), verified = structure(c(2L,
1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 2L, 2L,
2L, 1L, 2L, 2L), .Label = c("Not Verified", "Source Verified",
"Verified"), class = "factor"), status = structure(c(3L,
3L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("Charged Off", "Default", "Fully Paid"
), class = "factor"), reason = structure(c(4L, 2L, 3L, 3L,
3L, 2L, 2L, 8L, 3L, 3L, 3L, 3L, 3L, 9L, 9L, 4L, 2L, 4L, 2L,
3L), .Label = c("car", "credit_card", "debt_consolidation",
"home_improvement", "house", "major_purchase", "medical",
"moving", "other", "renewable_energy", "small_business",
"vacation", "wedding"), class = "factor"), state = structure(c(46L,
21L, 30L, 2L, 5L, 11L, 14L, 5L, 37L, 42L, 42L, 44L, 33L,
8L, 19L, 11L, 41L, 14L, 3L, 33L), .Label = c("AK", "AL",
"AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "HI",
"IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN",
"MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV",
"NY", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX",
"UT", "VA", "VT", "WA", "WI", "WV", "WY"), class = "factor"),
debt_inc_rat = c(20.42, 14.9, 12.83, 20.07, 26.18, 15.26,
25.59, 6.01, 21.07, 10.94, 31.2, 13.12, 9.33, 1.2, 11.74,
27.7, 26.65, 19.24, 23.4, 21.84), delinq2yr = c(1L, 0L, 1L,
1L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 0L,
0L, 0L), inq6mth = c(1L, 3L, 1L, 3L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), open_acc = c(10L,
13L, 9L, 14L, 11L, 4L, 11L, 6L, 18L, 11L, 8L, 9L, 13L, 5L,
16L, 16L, 7L, 26L, 4L, 19L), pub_rec = c(1L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L), revol_ratio = c(0.22, 0.53, 0.62, 0.61, 0.58, 0.78,
0.66, 0.04, 0.85, 0.46, 0.46, 0.92, 0.59, 0.32, 0.21, 0.72,
0.85, 0.57, 0.64, 0.23), total_acc = c(26L, 17L, 29L, 35L,
20L, 8L, 18L, 8L, 40L, 16L, 14L, 28L, 26L, 11L, 26L, 36L,
24L, 41L, 15L, 36L), total_paid = c(10004.43161, 17053.23761,
17788.62, 8909.41, 2424.16, 22618.02, 23069.03794, 8820.741039,
20586.97, 18349.10828, 1213.64, 18590.71, 8394.285943, 16982.69,
3293.165743, 5093.61, 21429.4, 25922.82, 12875.89999, 12959.12
), total_bal = c(175097L, 25184L, 39845L, 276866L, 44292L,
66042L, 233534L, 7059L, 219927L, 114621L, 29741L, 335392L,
46661L, 114486L, 22527L, 40819L, 176379L, 238772L, 57456L,
71201L), total_rev_lim = c(19000L, 47100L, 25700L, 13040L,
17900L, 65500L, 72100L, 39800L, 64900L, 20100L, 28200L, 20700L,
7100L, 9600L, 55700L, 7200L, 27000L, 8300L, 7200L, 34300L
), acc_open24 = c(4L, 7L, 6L, 8L, 3L, 2L, 3L, 2L, 9L, 4L,
2L, 4L, 2L, 2L, 1L, 13L, 4L, 3L, 3L, 6L), avg_bal = c(17510L,
1937L, 4427L, 21297L, 4027L, 16511L, 23353L, 1177L, 12937L,
10420L, 4249L, 47913L, 3589L, 22897L, 1408L, 2721L, 25197L,
9184L, 14364L, 4188L), bc_open = c(6690L, 19007L, 7940L,
2267L, 5888L, 7758L, 16881L, 35401L, 782L, 5818L, 12515L,
0L, 2942L, 0L, 35319L, 1834L, 360L, 2316L, 2600L, 23337L),
bc_ratio = c(14.2, 56.6, 66.5, 77, 33.8, 80.8, 57.9, 3.8,
98.4, 49.4, 49.3, 102.2, 58.6, 103.7, 20.5, 67.8, 98.5, 66.9,
63.9, 25), total_lim = c(207427L, 47352L, 57626L, 292092L,
69796L, 88066L, 298525L, 57181L, 259850L, 131377L, 54172L,
361379L, 53648L, 124005L, 69108L, 69822L, 201136L, 253377L,
73564L, 114318L), total_rev_bal = c(31782L, 25184L, 39845L,
44598L, 44292L, 66042L, 84958L, 7059L, 134381L, 23389L, 29741L,
25022L, 46661L, 3088L, 22527L, 23481L, 81434L, 94468L, 33134L,
71201L), total_bc_lim = c(7800L, 43800L, 23700L, 6800L, 8900L,
40500L, 40100L, 36800L, 48200L, 11500L, 24700L, 16800L, 7100L,
2000L, 44400L, 5700L, 23400L, 7000L, 7200L, 31100L), total_il_lim = c(38991L,
0L, 31926L, 44052L, 51896L, 22566L, 71125L, 17381L, 104950L,
14140L, 25972L, 10000L, 46548L, 0L, 13408L, 28244L, 59542L,
99477L, 32892L, 80018L), statusRank = structure(c(2L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("0", "1"), class = "factor")), row.names = c(11413L,
2561L, 25337L, 1643L, 14264L, 24191L, 33989L, 28193L, 21129L,
7895L, 29007L, 26622L, 3065L, 11423L, 3953L, 5789L, 30150L, 6070L,
1486L, 13195L), class = "data.frame")
>
Data Trace from Debug str()
When the R code is run, the following str(csa.cmt) is issued from prior statement:
csa.cmt <- compute_matrix.func(csa.func.p, csa.func.confusion_table)
However, the handling of the difference of 'same levels' was coded to be handled and corrected inside the called / invoked the function 'compute_confusion_table.func'
> csa_computed_accuracies <- sapply(compute_for_values, compute_seq_accuracy.func, simplify = FALSE)
List of 2
$ message: chr "`data` and `reference` should be factors with the same levels."
$ call : NULL
- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
List of 2
$ message: chr "`data` and `reference` should be factors with the same levels."
$ call : NULL
- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
List of 2
$ message: chr "`data` and `reference` should be factors with the same levels."
$ call : NULL
- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
List of 2
$ message: chr "`data` and `reference` should be factors with the same levels."
$ call : NULL
- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
List of 2
$ message: chr "`data` and `reference` should be factors with the same levels."
$ call : NULL
- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
List of 2
$ message: chr "`data` and `reference` should be factors with the same levels."
$ call : NULL
- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
>

1) How do I sort multiple tables in a list by descending order? 2) How do I create dataframes from one list of multiple tables?

I have multiple tables in a list.
1) How do I sort all tables in the list by descending order? (Ideally, I'd keep my object as a list).
EDIT: Sort items in each table by descending order.
Example of what I have now:
$animals
Cat 10
Dog 20
Panda 50
Snake 40
$colors
blue 20
green 5
red 30
yellow 2
Example of what I want:
$animals
Panda 50
Snake 40
Dog 20
Cat 10
$colors
red 30
blue 20
green 5
yellow 2
2) How do I create multiple dataframes from the multiple tables in the list? For example, the first table in the list is called 'brand', and the second table in the list is called 'style'. I want to create new dataframes called df_brand and df_style.
3) I am sorry my dput() is long. I could not figure out how to print the head() of my list of multiple tables. If you know how to do that, I would appreciate a solution for that too.
x <- list(brand = structure(c(`1 To 3 Noodles` = 1L, `7 Select` = 2L,
`7 Select/Nissin` = 1L, `A-One` = 4L, `A-Sha Dry Noodle` = 26L,
A1 = 3L, ABC = 12L, Acecook = 15L, Adabi = 4L, `Ah Lai` = 2L,
Ajinatori = 2L, Amianda = 10L, Amino = 3L, `Annie Chun's` = 12L,
Aroi = 2L, `Asia Gold` = 4L, `Asian Thai Foods` = 14L, `Authentically Asian` = 1L,
Azami = 5L, Baijia = 11L, `Baixiang Noodles` = 5L, Baltix = 2L,
Bamee = 5L, Batchelors = 16L, `Binh Tay` = 3L, `Bon Go Jang` = 2L,
Bonasia = 4L, Boss = 1L, `Campbell's` = 3L, `Cap Atoom Bulan` = 1L,
CarJEN = 7L, `Chaudhary's Wai Wai` = 1L, Chencun = 5L, `Chering Chang` = 5L,
Chewy = 8L, Chikara = 1L, `China Best` = 1L, `Ching's Secret` = 4L,
`Chorip Dong` = 1L, ChoripDong = 1L, Choumama = 1L, `Chuan Wei Wang` = 2L,
Cintan = 5L, `CJ CheilJedang` = 2L, Conimex = 5L, `Crystal Noodle` = 1L,
`Curry Prince` = 1L, Daddy = 1L, Daifuku = 1L, Daikoku = 6L,
Daraz = 1L, Deshome = 13L, Doll = 16L, Dongwon = 1L, `Dr. McDougall's` = 1L,
Dragonfly = 13L, `Dream Kitchen` = 4L, `E-mi` = 2L, `E-Zee` = 3L,
`Eat & Go` = 5L, Econsave = 1L, Emart = 7L, Fantastic = 6L, `Farmer's Heart` = 1L,
`Fashion Food` = 3L, `Fashion Foods` = 5L, FMF = 2L, Foodmon = 2L,
`Forest Noodles` = 4L, Fortune = 4L, `Four Seas` = 8L, `Fu Chang Chinese Noodle Company` = 1L,
`Fuji Mengyo` = 1L, Fujiwara = 7L, Fuku = 10L, GaGa = 7L, `Gau Do` = 2L,
Gefen = 4L, GGE = 1L, `Global Inspiration` = 1L, `Goku-Uma` = 4L,
`Goku Uma` = 3L, `Golden Mie` = 3L, `Golden Wheat` = 12L, `Golden Wonder` = 1L,
Gomex = 2L, `Good Tto Leu Foods` = 1L, `Great Value` = 7L, GreeNoodle = 4L,
GS25 = 2L, `Guava Story` = 1L, Haioreum = 1L, `Han's South Korea` = 3L,
Hankow = 2L, `Hao Way` = 8L, `Happy Cook` = 3L, `Happy Family` = 2L,
Healtimie = 2L, `Hi-Myon` = 2L, Higashi = 1L, Higashimaru = 1L,
HoMyeonDang = 5L, Hosoonyi = 1L, `Hsin Tung Yang` = 1L, `Hua Feng` = 1L,
`Hua Feng Noodle Expert` = 2L, Ibumie = 10L, IbuRamen = 3L, ICA = 2L,
`Ikeda Shoku` = 2L, iMee = 4L, Indomie = 53L, iNoodle = 2L, Ishimaru = 1L,
Itomen = 5L, Itsuki = 4L, J.J. = 2L, `Jackpot Teriyaki` = 1L,
JFC = 2L, Jingqi = 8L, JML = 23L, `Just Way` = 2L, `Kabuto Noodles` = 5L,
Kailo = 3L, Kamfen = 15L, `Kang Shi Fu` = 5L, Katoz = 1L, `Kiki Noodle` = 2L,
`Kim's Bowl` = 1L, `Kim Ve Wong` = 1L, Kimura = 1L, `Kin-Dee` = 2L,
Knorr = 8L, `Ko-Lee` = 10L, `Koh Thai` = 4L, Koka = 18L, KOKA = 25L,
`Komforte Chockolates` = 1L, Koyo = 7L, Kumamoto = 1L, Kuriki = 3L,
`La Fonte` = 2L, `La Moderna` = 1L, `Lee Fah Mee` = 1L, Lele = 1L,
`Liang Cheng Mai` = 1L, Lipton = 1L, Lishan = 1L, `Lishan Food Manufacturing` = 1L,
`Little Cook` = 14L, `Liu Quan` = 1L, `Long Jun Hang` = 2L, `Long Kow` = 5L,
`Lotus Foods` = 3L, `Love Cook` = 5L, `Lucky Me!` = 34L, Maggi = 30L,
Maitri = 1L, Mama = 71L, MAMA = 27L, `Mama Pat's` = 4L, Mamee = 29L,
Maruchan = 76L, Marutai = 7L, `Master Kong` = 28L, `Mee Jang` = 7L,
`Men-Sunaoshi` = 2L, Menraku = 8L, `Mexi-Ramen` = 1L, `Mi E-Zee` = 5L,
`Mi Sedaap` = 12L, `Mie Sedaap` = 1L, Migawon = 1L, Miliket = 1L,
`Miracle Noodle` = 1L, Mitoku = 1L, `Mom's Dry Noodle` = 6L,
Morre = 1L, `Mr. Lee's Noodles` = 6L, `Mr. Noodles` = 15L, `Mr. Udon` = 4L,
`Mug Shot` = 2L, `Mum Ngon` = 1L, MyKuali = 24L, Myojo = 63L,
MyOri = 5L, `Nagao Noodle` = 1L, Nagatanien = 1L, `Nakaya Shouten` = 1L,
`Nan Hsing` = 1L, `Nan Jie Cun` = 1L, `Nanyang Chef` = 2L, `New Touch` = 9L,
`New Way` = 1L, Nissin = 381L, `No Name` = 2L, `Noah Foods` = 2L,
Nongshim = 98L, `Noodle Time` = 2L, `Nyor Nyar` = 2L, `O Sung` = 1L,
Ogasawara = 2L, Ohsung = 3L, Omachi = 1L, `One Dish Asia` = 1L,
`Oni Hot Pot` = 4L, `ORee Garden` = 1L, `Osaka Ramen` = 1L, Ottogi = 46L,
Oyatsu = 4L, Paldo = 66L, `Paldo Vina` = 3L, Pama = 4L, Pamana = 1L,
Papa = 1L, Patanjali = 1L, Payless = 6L, Peyang = 1L, Pirkka = 3L,
`Plats Du Chef` = 1L, `Pop Bihun` = 3L, `Pot Noodle` = 11L, Pran = 2L,
Premiere = 2L, President = 1L, `President Rice` = 1L, Prima = 4L,
`Prima Taste` = 7L, Pringles = 1L, Pulmuone = 8L, Q = 1L, `Qin Zong` = 1L,
Quickchow = 5L, `Rhee Bros Assi` = 6L, `Right Foods` = 1L, `Ripe'n'Dry` = 3L,
`Rocket Brand` = 1L, Roland = 2L, `Royal Umbrella` = 2L, Ruski = 6L,
`S&S` = 1L, Sahmyook = 1L, `Saigon Ve Wong` = 13L, `Sainsbury's` = 5L,
Saji = 2L, `Sakura Noodle` = 5L, Sakurai = 1L, `Sakurai Foods` = 10L,
`Salam Mie` = 2L, `Samurai Ramen` = 1L, Samyang = 19L, `Samyang Foods` = 52L,
Sanpo = 1L, Sanrio = 1L, `Sanyo Foods` = 1L, `Sao Tao` = 4L,
`Sapporo Ichiban` = 25L, Sarimi = 7L, `Sau Tao` = 14L, Sawadee = 4L,
Sempio = 3L, `Seven-Eleven` = 1L, `Seven & I` = 1L, Shan = 5L,
Shirakiku = 11L, `Sichuan Baijia` = 10L, `Sichuan Guangyou` = 4L,
`Singa-Me` = 3L, `Six Fortune` = 6L, Smack = 1L, Snapdragon = 5L,
Sokensha = 1L, `Song Hak` = 1L, Souper = 2L, Springlife = 1L,
`Star Anise Foods` = 1L, `Sugakiya Foods` = 2L, Suimin = 8L,
`Sun Noodle` = 7L, Sunlee = 8L, Sunlight = 1L, `Sunny Maid` = 1L,
Super = 5L, `Super Bihun` = 4L, SuperMi = 8L, Sura = 1L, Sutah = 1L,
Tablemark = 3L, Takamori = 1L, `Takamori Kosan` = 14L, `Tao Kae Noi` = 1L,
`Tasty Bite` = 6L, Tayho = 1L, `Ten-In` = 2L, `Teriyaki Time` = 1L,
Tesco = 4L, `Thai Chef` = 4L, `Thai Choice` = 3L, `Thai Kitchen` = 10L,
`Thai Pavilion` = 3L, `Thai Smile` = 3L, `The Bridge` = 1L, `The Kitchen Food` = 2L,
`The Ramen Rater Select` = 1L, `Thien Houng Foods` = 1L, Tiger = 1L,
`Tiger Tiger` = 2L, `Tokachimen Koubou` = 1L, `Tokushima Seifun` = 4L,
`Tokyo Noodle` = 4L, Torishi = 1L, Tradition = 5L, TRDP = 1L,
Trident = 4L, `Tropicana Slim` = 2L, `Tseng Noodles` = 7L, TTL = 3L,
`Tung-I` = 1L, `Uncle Sun` = 2L, `Uni-President` = 12L, Unif = 13L,
`Unif-100` = 2L, `Unif / Tung-I` = 11L, `Unif Tung-I` = 1L, United = 3L,
Unox = 6L, Unzen = 1L, `Urban Noodle` = 5L, `US Canning` = 1L,
`Ve Wong` = 24L, Vedan = 6L, Vifon = 33L, `Vina Acecook` = 34L,
`Vit's` = 13L, `Wai Wai` = 25L, Wang = 6L, `Weh Lih` = 1L, `Wei Chuan` = 2L,
`Wei Lih` = 15L, `Wei Wei` = 3L, Westbrae = 1L, `Western Family` = 6L,
`World O' Noodle` = 2L, `Wu-Mu` = 12L, `Wu Mu` = 7L, Wugudaochang = 10L,
`Xiao Ban Mian` = 3L, Xiuhe = 1L, Yamachan = 11L, Yamadai = 1L,
Yamamori = 2L, Yamamoto = 4L, `Yum-Mie` = 1L, `Yum Yum` = 12L,
`Zow Zow` = 1L), .Dim = 355L, .Dimnames = structure(list(c("1 To 3 Noodles",
"7 Select", "7 Select/Nissin", "A-One", "A-Sha Dry Noodle", "A1",
"ABC", "Acecook", "Adabi", "Ah Lai", "Ajinatori", "Amianda",
"Amino", "Annie Chun's", "Aroi", "Asia Gold", "Asian Thai Foods",
"Authentically Asian", "Azami", "Baijia", "Baixiang Noodles",
"Baltix", "Bamee", "Batchelors", "Binh Tay", "Bon Go Jang", "Bonasia",
"Boss", "Campbell's", "Cap Atoom Bulan", "CarJEN", "Chaudhary's Wai Wai",
"Chencun", "Chering Chang", "Chewy", "Chikara", "China Best",
"Ching's Secret", "Chorip Dong", "ChoripDong", "Choumama", "Chuan Wei Wang",
"Cintan", "CJ CheilJedang", "Conimex", "Crystal Noodle", "Curry Prince",
"Daddy", "Daifuku", "Daikoku", "Daraz", "Deshome", "Doll", "Dongwon",
"Dr. McDougall's", "Dragonfly", "Dream Kitchen", "E-mi", "E-Zee",
"Eat & Go", "Econsave", "Emart", "Fantastic", "Farmer's Heart",
"Fashion Food", "Fashion Foods", "FMF", "Foodmon", "Forest Noodles",
"Fortune", "Four Seas", "Fu Chang Chinese Noodle Company", "Fuji Mengyo",
"Fujiwara", "Fuku", "GaGa", "Gau Do", "Gefen", "GGE", "Global Inspiration",
"Goku-Uma", "Goku Uma", "Golden Mie", "Golden Wheat", "Golden Wonder",
"Gomex", "Good Tto Leu Foods", "Great Value", "GreeNoodle", "GS25",
"Guava Story", "Haioreum", "Han's South Korea", "Hankow", "Hao Way",
"Happy Cook", "Happy Family", "Healtimie", "Hi-Myon", "Higashi",
"Higashimaru", "HoMyeonDang", "Hosoonyi", "Hsin Tung Yang", "Hua Feng",
"Hua Feng Noodle Expert", "Ibumie", "IbuRamen", "ICA", "Ikeda Shoku",
"iMee", "Indomie", "iNoodle", "Ishimaru", "Itomen", "Itsuki",
"J.J.", "Jackpot Teriyaki", "JFC", "Jingqi", "JML", "Just Way",
"Kabuto Noodles", "Kailo", "Kamfen", "Kang Shi Fu", "Katoz",
"Kiki Noodle", "Kim's Bowl", "Kim Ve Wong", "Kimura", "Kin-Dee",
"Knorr", "Ko-Lee", "Koh Thai", "Koka", "KOKA", "Komforte Chockolates",
"Koyo", "Kumamoto", "Kuriki", "La Fonte", "La Moderna", "Lee Fah Mee",
"Lele", "Liang Cheng Mai", "Lipton", "Lishan", "Lishan Food Manufacturing",
"Little Cook", "Liu Quan", "Long Jun Hang", "Long Kow", "Lotus Foods",
"Love Cook", "Lucky Me!", "Maggi", "Maitri", "Mama", "MAMA",
"Mama Pat's", "Mamee", "Maruchan", "Marutai", "Master Kong",
"Mee Jang", "Men-Sunaoshi", "Menraku", "Mexi-Ramen", "Mi E-Zee",
"Mi Sedaap", "Mie Sedaap", "Migawon", "Miliket", "Miracle Noodle",
"Mitoku", "Mom's Dry Noodle", "Morre", "Mr. Lee's Noodles", "Mr. Noodles",
"Mr. Udon", "Mug Shot", "Mum Ngon", "MyKuali", "Myojo", "MyOri",
"Nagao Noodle", "Nagatanien", "Nakaya Shouten", "Nan Hsing",
"Nan Jie Cun", "Nanyang Chef", "New Touch", "New Way", "Nissin",
"No Name", "Noah Foods", "Nongshim", "Noodle Time", "Nyor Nyar",
"O Sung", "Ogasawara", "Ohsung", "Omachi", "One Dish Asia", "Oni Hot Pot",
"ORee Garden", "Osaka Ramen", "Ottogi", "Oyatsu", "Paldo", "Paldo Vina",
"Pama", "Pamana", "Papa", "Patanjali", "Payless", "Peyang", "Pirkka",
"Plats Du Chef", "Pop Bihun", "Pot Noodle", "Pran", "Premiere",
"President", "President Rice", "Prima", "Prima Taste", "Pringles",
"Pulmuone", "Q", "Qin Zong", "Quickchow", "Rhee Bros Assi", "Right Foods",
"Ripe'n'Dry", "Rocket Brand", "Roland", "Royal Umbrella", "Ruski",
"S&S", "Sahmyook", "Saigon Ve Wong", "Sainsbury's", "Saji", "Sakura Noodle",
"Sakurai", "Sakurai Foods", "Salam Mie", "Samurai Ramen", "Samyang",
"Samyang Foods", "Sanpo", "Sanrio", "Sanyo Foods", "Sao Tao",
"Sapporo Ichiban", "Sarimi", "Sau Tao", "Sawadee", "Sempio",
"Seven-Eleven", "Seven & I", "Shan", "Shirakiku", "Sichuan Baijia",
"Sichuan Guangyou", "Singa-Me", "Six Fortune", "Smack", "Snapdragon",
"Sokensha", "Song Hak", "Souper", "Springlife", "Star Anise Foods",
"Sugakiya Foods", "Suimin", "Sun Noodle", "Sunlee", "Sunlight",
"Sunny Maid", "Super", "Super Bihun", "SuperMi", "Sura", "Sutah",
"Tablemark", "Takamori", "Takamori Kosan", "Tao Kae Noi", "Tasty Bite",
"Tayho", "Ten-In", "Teriyaki Time", "Tesco", "Thai Chef", "Thai Choice",
"Thai Kitchen", "Thai Pavilion", "Thai Smile", "The Bridge",
"The Kitchen Food", "The Ramen Rater Select", "Thien Houng Foods",
"Tiger", "Tiger Tiger", "Tokachimen Koubou", "Tokushima Seifun",
"Tokyo Noodle", "Torishi", "Tradition", "TRDP", "Trident", "Tropicana Slim",
"Tseng Noodles", "TTL", "Tung-I", "Uncle Sun", "Uni-President",
"Unif", "Unif-100", "Unif / Tung-I", "Unif Tung-I", "United",
"Unox", "Unzen", "Urban Noodle", "US Canning", "Ve Wong", "Vedan",
"Vifon", "Vina Acecook", "Vit's", "Wai Wai", "Wang", "Weh Lih",
"Wei Chuan", "Wei Lih", "Wei Wei", "Westbrae", "Western Family",
"World O' Noodle", "Wu-Mu", "Wu Mu", "Wugudaochang", "Xiao Ban Mian",
"Xiuhe", "Yamachan", "Yamadai", "Yamamori", "Yamamoto", "Yum-Mie",
"Yum Yum", "Zow Zow")), .Names = ""), class = "table"), style = structure(c(2L,
Bar = 1L, Bowl = 481L, Box = 6L, Can = 1L, Cup = 450L, Pack = 1531L,
Tray = 108L), .Dim = 8L, .Dimnames = structure(list(c("", "Bar",
"Bowl", "Box", "Can", "Cup", "Pack", "Tray")), .Names = ""), class = "table"),
country = structure(c(Australia = 22L, Bangladesh = 7L, Brazil = 5L,
Cambodia = 5L, Canada = 41L, China = 169L, Colombia = 6L,
Dubai = 3L, Estonia = 2L, Fiji = 4L, Finland = 3L, Germany = 27L,
Ghana = 2L, Holland = 4L, `Hong Kong` = 137L, Hungary = 9L,
India = 31L, Indonesia = 126L, Japan = 352L, Malaysia = 156L,
Mexico = 25L, Myanmar = 14L, Nepal = 14L, Netherlands = 15L,
Nigeria = 1L, Pakistan = 9L, Philippines = 47L, Poland = 4L,
Sarawak = 3L, Singapore = 109L, `South Korea` = 309L, Sweden = 3L,
Taiwan = 224L, Thailand = 191L, UK = 69L, `United States` = 1L,
USA = 323L, Vietnam = 108L), .Dim = 38L, .Dimnames = structure(list(
c("Australia", "Bangladesh", "Brazil", "Cambodia", "Canada",
"China", "Colombia", "Dubai", "Estonia", "Fiji", "Finland",
"Germany", "Ghana", "Holland", "Hong Kong", "Hungary",
"India", "Indonesia", "Japan", "Malaysia", "Mexico",
"Myanmar", "Nepal", "Netherlands", "Nigeria", "Pakistan",
"Philippines", "Poland", "Sarawak", "Singapore", "South Korea",
"Sweden", "Taiwan", "Thailand", "UK", "United States",
"USA", "Vietnam")), .Names = ""), class = "table"), whole_stars = structure(c(`0` = 54L,
`1` = 103L, `2` = 250L, `3` = 1043L, `4` = 741L, `5` = 386L,
U = 3L), .Dim = 7L, .Dimnames = structure(list(c("0", "1",
"2", "3", "4", "5", "U")), .Names = ""), class = "table"),
top_rank = structure(c(2539L, `
` = 4L, `1` = 5L, `10` = 5L,
`2` = 2L, `3` = 2L, `4` = 4L, `5` = 3L, `6` = 4L, `7` = 4L,
`8` = 3L, `9` = 5L), .Dim = 12L, .Dimnames = structure(list(
c("", "\n", "1", "10", "2", "3", "4", "5", "6", "7",
"8", "9")), .Names = ""), class = "table"), top_year = structure(c(2539L,
`
` = 4L, `2012` = 9L, `2013` = 7L, `2014` = 8L, `2015` = 7L,
`2016` = 6L), .Dim = 7L, .Dimnames = structure(list(c("",
"\n", "2012", "2013", "2014", "2015", "2016")), .Names = ""), class = "table"))
To sort each component, use lapply:
sorted <- lapply(x, sort, decreasing = TRUE)
To convert the tables to dataframes, use as.data.frame. This gives you a list of dataframes, then changes the names:
df <- lapply(sorted, as.data.frame)
names(df) <- paste0("df_", names(sorted))
If you also want these as separate variables (which is probably not a good idea), you could use
for (n in names(df)) assign(n, df[[n]])
To get the head of each element of the list, use lapply again:
lapply(df, head)
This gives output starting out as
$df_brand
Var1 Freq
1 Nissin 381
2 Nongshim 98
3 Maruchan 76
4 Mama 71
5 Paldo 66
6 Myojo 63
$df_style
Var1 Freq
1 Pack 1531
2 Bowl 481
3 Cup 450
4 Tray 108
5 Box 6
6 2

Plot values in ggplot geom_lines

I have a dataframe like this one:
> dput(df)
structure(list(OBBLIGATORIO = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), COUNTRY = structure(c(16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L), .Label = c("Austria", "Belgium", "Bulgaria",
"Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia",
"Finland", "France", "Germany", "Greece", "Hungary", "Iceland",
"Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta",
"Norway", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia",
"Spain", "Sweden", "United Kingdom of Great Britain and Northern Ireland"
), class = "factor"), YEAR = c(2003L, 2006L, 2007L, 2008L, 2009L,
2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L,
2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L,
1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L, 2008L,
2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L,
2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L,
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L,
2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L,
2001L, 2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L,
1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L,
2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L,
2000L, 2001L, 2002L), AGE = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Total", class = "factor"),
`CAUSE OF DEATH` = c("Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria",
"Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria",
"Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Measles",
"Measles", "Measles", "Measles", "Measles", "Measles", "Measles",
"Measles", "Measles", "Measles", "Measles", "Measles", "Measles",
"Measles", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus",
"Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus",
"Tetanus", "Tetanus", "Tetanus", "Tuberculosis", "Tuberculosis",
"Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis",
"Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis",
"Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Whooping cough", "Whooping cough",
"Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough",
"Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough",
"Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough"
), VALUE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 6L, 7L, 7L, 1L, 2L,
3L, 2L, 5L, 12L, 9L, 13L, 9L, 13L, 8L, 17L, 14L, 16L, 18L,
15L, 19L, 11L, 10L, 25L, 24L, 21L, 22L, 23L, 20L, 34L, 32L,
31L, 30L, 29L, 28L, 27L, 26L, 41L, 42L, 43L, 45L, 46L, 47L,
33L, 35L, 36L, 37L, 38L, 39L, 40L, 44L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L, 1L), .Label = c("0", "1",
"2", "3", "6", "7", "9", "17", "18", "19", "21", "22", "27",
"28", "30", "31", "37", "41", "42", "301", "329", "333",
"344", "350", "396", "413", "415", "460", "517", "558", "597",
"609", "622", "647", "681", "1087", "1349", "1413", "1448",
"1499", "1576", "1654", "1725", "1948", "2531", "2665", "2757"
), class = "factor"), ID = 1:98), .Names = c("OBBLIGATORIO",
"COUNTRY", "YEAR", "AGE", "CAUSE OF DEATH", "VALUE", "ID"), row.names = c(NA,
-98L), class = "data.frame")
I want to obtain a chart that:
on x axis there are values from YEAR column
on y axis there are
values from VALUE column data are divided by CAUSE OF DEATH column
So something like:
I try:
x11()
ggplot(df, aes(x = df$`YEAR`, y = df$`VALUE`, fill = df$`CAUSE OF DEATH`, colour = df$`CAUSE OF DEATH`)) +
geom_density(alpha = 0.1) +
xlim(1995, 2010)
But the result is completely different from the one I want.
Thanks
I'm not sure what your actual question is, but one problem with your dataframe is that the VALUE column is currently defined as a factor, not as as a numeric. I think that remedying this will go a long way to solving your problem. I do this post-facto below (i.e. after the dataframe is already created), but if you are getting the data into R via a read.table() or similar command, you can specify the class of your columns at data frame creation time, which is probably a better approach.
In my code below I use the dplyr package for manipulating dataframes. It's quite powerful, but for this particular example it isn't doing anything that base R couldn't do.
require(ggplot2)
require(dplyr)
require(magrittr)
df <- ### YOUR dput output goes here ###
# fix the problem with the `VALUE` column
df %<>% mutate(VALUE = VALUE %>% as.character %>% as.numeric)
# equivalent in base R:
# df$VALUE <- as.numeric(as.character(df$VALUE))
# make a graph (is it the one you want?)
df %>% group_by(YEAR, `CAUSE OF DEATH`) %>%
summarize(value = sum(VALUE)) %>%
ggplot(aes(x = YEAR, y = value, color = `CAUSE OF DEATH`)) +
geom_line() +
theme_bw() +
geom_point()
# save graph for uploading to SO
ggsave('SO37230266.png')
The result is this graph:

How to use multiple symbols in plots based on different variables in R?

I have created a PCA for measurements collected on individual from four locations placed on four substrates with three replicates. I have the sex (male or female)and "karyotype" (factor with three possible categories) and the calculated the first two PC scores for each individual.
I would like to make a plot where male and female have different symbols and the colour of the symbols is dependent on the karotype. I have created a plot with the code below that gives me one symbol colour coded for the three karyotypes and put 95% confidence elispses around the males and females.
How can I change the symbol for each sex and keeping the colouring dependent on the karytype? I would also like to have this reflected in the legend.
One last question. Is it possible to add an arrow for each PC (not each individual) from the origin similar to those found in ordination plots?
Sample Data:
test <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Kampinge", "Kaseberga", "Molle", "Steninge"
), class = "factor"), Substrate = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L), .Label = c("Kampinge", "Kaseberga", "Molle",
"Steninge"), class = "factor"), Replicate = structure(c(1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
Sex = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L
), .Label = c("Female", "Male"), class = "factor"), Karyotype = structure(c(3L,
4L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 4L,
3L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 3L), .Label = c("", "BB",
"BD", "DD"), class = "factor"), Wing_Length = c(1439L, 1224L,
1558L, 1508L, 1286L, 1560L, 1377L, 1486L, 1638L, 1475L, 1703L,
1726L, 1668L, 1405L, 1737L, 1419L, 1530L, 1508L, 1525L, 1326L,
1609L, 1357L, 1830L, 1476L, 1661L), Leg_Length = c(465L,
357L, 610L, 415L, 343L, 560L, 435L, 390L, 425L, 514L, 693L,
695L, 657L, 454L, 661L, 382L, 431L, 531L, 435L, 387L, 407L,
414L, 752L, 524L, 650L), Development_Time = c(15, 15, 12,
12, 12, 12, 12, 12, 12, 15, 15, 15, 15, 15, 15, 15, 11, 12,
14, 12, 14, 14, 14, 11, 11), PC1 = c(-281.031806232855, -515.247908786317,
-96.7283446465637, -260.171340782501, -476.664849753781,
-127.267190895631, -347.839240839062, -293.08530374415, -154.026702195308,
-221.98257463847, 67.7504074590983, 86.6778734586525, 17.8073498265326,
-314.171132928964, 73.3068216627556, -349.616320093329, -233.030545551831,
-185.761623361004, -234.30046275676, -417.754317941649, -187.820500930148,
-376.653043663908, 203.025275308178, -214.80078992031, 7.94703091626344
), PC2 = c(-78.3082792875783, -133.370219905995, -113.211488986839,
4.31036861466361, -82.8593541869054, -73.5708675263244, -95.0643731443612,
9.37702847686542, 80.0290301136235, -92.8061497557789, -83.8731164047719,
-70.6537733486393, -78.706783632851, -91.6793310834752, -37.5144466525303,
-27.4637667171696, 6.14809390611532, -84.6794844768708, -0.127837123829732,
-90.9556028004192, 75.2353710655562, -91.7834027435658, -47.669385541585,
-99.8362257341741, -77.8269478596591)), .Names = c("Location",
"Substrate", "Replicate", "Sex", "Karyotype", "Wing_Length",
"Leg_Length", "Development_Time", "PC1", "PC2"), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 30L, 31L), class = "data.frame")
## Plot
par(mfrow=c(1,1), mar=c(4,4,2,1), pty = "s")
plot(test$PC1, test$PC2, xlab="PC1", ylab="PC2", pch=16, col=as.numeric(test[,"Karyotype"]),
xlim = c(-1000, 1000), ylim = c(-250, 250), las=1, cex.lab = 1.5, cex.axis = 1.25, main = NULL)
ordiellipse(test[,9:10], test$Sex, conf=0.95, col="black", cex=1.75, label=TRUE)
legend("bottomright", pch=16, col=unique(as.numeric(test[,"Karyotype"])), legend=unique(test[,"Karyotype"]), cex = 1.75)
Replace your pch plot argument by something like :
pch=ifelse(test$Sex=='Male',15,19)
Try with ggplot:
library(ggplot2)
ggplot(test, aes(x=PC1, y=PC2, color=Karyotype, shape=Sex, group=Sex))+geom_point(size=5)+stat_ellipse()

Resources