Add column based on header cell; then remove header cell - r

I have interesting data that is not uniform. A group of items are listed under the category name, but it is all in the same column. I need to add a column with the row corresponding to the item's category that it belongs to (then remove the category heading). The only way to distinguish a new category is determining whether the value under the year is empty.... My dputs should explain my issue more clearly.
Before:
structure(list(X = structure(c(13L, 1L, 19L, 16L, 5L, 17L, 11L,
8L, 2L, 10L, 4L, 6L, 18L, 15L, 21L, 12L, 14L, 9L, 3L, 20L, 7L
), .Label = c("-Burgers", "-Cameras", "-Shirts", "+Laptops",
"+Salads", "+TVs", "Caps", "Cell", "Clothes:", "Desktops", "Electronics",
"Flowers", "Food", "Garden Nomes", "Grills", "Hotdogs", "Nachoes",
"Outdoors:", "Pizza", "Shorts", "Swimming Gear"), class = "factor"),
X2000 = c(NA, 104L, 159L, 184L, 189L, 182L, NA, 49L, 28L,
46L, 34L, 43L, NA, 129L, 190L, 189L, 119L, NA, 45L, 80L,
80L), X2001 = c(NA, 147L, 192L, 164L, 174L, 196L, NA, 40L,
34L, 43L, 35L, 22L, NA, 114L, 130L, 120L, 145L, NA, 56L,
35L, 54L), X2002 = c(NA, 163L, 172L, 138L, 146L, 190L, NA,
38L, 40L, 21L, 22L, 33L, NA, 186L, 172L, 139L, 119L, NA,
88L, 78L, 91L), X2003 = c(NA, 125L, 152L, 182L, 148L, 125L,
NA, 36L, 44L, 34L, 27L, 50L, NA, 119L, 115L, 188L, 166L,
NA, 91L, 77L, 77L), X2004 = c(NA, 116L, 111L, 120L, 153L,
199L, NA, 49L, 48L, 43L, 37L, 32L, NA, 159L, 116L, 143L,
153L, NA, 18L, 53L, 51L)), .Names = c("X", "X2000", "X2001",
"X2002", "X2003", "X2004"), class = "data.frame", row.names = c(NA,
-21L))
After:
structure(list(X = structure(c(1L, 15L, 13L, 5L, 14L, 8L, 2L,
9L, 4L, 6L, 12L, 17L, 10L, 11L, 3L, 16L, 7L), .Label = c("-Burgers",
"-Cameras", "-Shirts", "+Laptops", "+Salads", "+TVs", "Caps",
"Cell", "Desktops", "Flowers", "Garden Nomes", "Grills", "Hotdogs",
"Nachoes", "Pizza", "Shorts", "Swimming Gear"), class = "factor"),
X.1 = structure(c(3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 1L, 1L, 1L), .Label = c("Clothes:", "Electronics",
"Food", "Outdoors:"), class = "factor"), X2000 = c(104L,
159L, 184L, 189L, 182L, 49L, 28L, 46L, 34L, 43L, 129L, 190L,
189L, 119L, 45L, 80L, 80L), X2001 = c(147L, 192L, 164L, 174L,
196L, 40L, 34L, 43L, 35L, 22L, 114L, 130L, 120L, 145L, 56L,
35L, 54L), X2002 = c(163L, 172L, 138L, 146L, 190L, 38L, 40L,
21L, 22L, 33L, 186L, 172L, 139L, 119L, 88L, 78L, 91L), X2003 = c(125L,
152L, 182L, 148L, 125L, 36L, 44L, 34L, 27L, 50L, 119L, 115L,
188L, 166L, 91L, 77L, 77L), X2004 = c(116L, 111L, 120L, 153L,
199L, 49L, 48L, 43L, 37L, 32L, 159L, 116L, 143L, 153L, 18L,
53L, 51L)), .Names = c("X", "X.1", "X2000", "X2001", "X2002",
"X2003", "X2004"), class = "data.frame", row.names = c(NA, -17L
))
The items are arbitrarily have + or - signs...I need that to remain the same. Also, some category headers have : while others do not.

We create an index based on the 'NA' values in columns other than the 1st ('indx'). We split the dataset using the 'indx', remove the first row i.e. NA values from columns 2nd to the last, cbind with the 1st row, 1st column value, rearrange the columns and rbind.
indx <- cumsum(!rowSums(!is.na(df1[-1])))
res <- do.call(rbind,lapply(split(df1, indx), function(x)
cbind(x, X.1= x[1,1])[-1,c(1,7,2:6)]))
row.names(res) <- NULL
all.equal(res, out, check.attributes=FALSE)
#[1] TRUE
where 'out' is the dput output of the expected result
Update
If the columns have '' instead of NA,
indx <- cumsum(!rowSums(df1[-1]!=''))
and do the rest as above. Having said that, when we have '' in a numeric column, the class will be either factor or character based on whether you specify stringsAsFactors=FALSE or =TRUE in the read.table/read.csv. So, keeping the '' as such will get the output also a factor/character class. I would convert the columns to their correct class first which will also coerce the '' to NA, i.e.
df1[-1] <- lapply(df1[-1], function(x) as.numeric(as.character(x)))
The as.character is only needed if the columns are factor class.
Once, we have done the conversion, the first approach should work fine as well.

Related

R datatable rotate header not aligning

I am trying to rotate the header for a datatable. I was able to rotate the header but the rotated header is not aligning with the columns. This is what I have now:
I referenced this for my code : header direction in shiny data table
Here is my code:
datatable(df, rownames = F,class = c("compact"),options = list(paging = F, autoWidth = T,searching= FALSE,
scrollX=T,
initComplete = JS("function(settings, json) {$(this.api().table().header()).css({'font-size' : '12px'});}"),
headerCallback = JS(
"function(thead, data, start, end, display){
var $ths = $(thead).find('th');
$ths.css({'vertical-align': 'bottom', 'white-space': 'nowrap'});
var betterCells = [];
$ths.each(function(){
var cell = $(this);
var newDiv = $('<div>', {height: 'auto', width: cell.height()});
var newInnerDiv = $('<div>', {text: cell.text()});
newDiv.css({margin: 'auto'});
newInnerDiv.css({
transform: 'rotate(220deg)',
'writing-mode': 'tb-rl',
'white-space': 'nowrap'
});
newDiv.append(newInnerDiv);
betterCells.push(newDiv);
});
$ths.each(function(i){
$(this).html(betterCells[i]);
});
}"
))) %>%
formatStyle(columns = c(1:19), `font-size` = '12px')
DATA:
structure(list(diag_category_name = structure(1:18, .Label = c("AIDS",
"Any malignancy", "Cerebrovascular disease", "Chronic pulmonary disease",
"Congestive heart failure", "Dementia", "Diabetes (mild to moderate)",
"Diabetes with chronic complications", "Hemoplegia or paralegia",
"Metastatic solid tumor", "Mild liver disease", "Moderate to severe liver disease",
"Myocardial infarction", "No Charlson Comorbidity", "Peptic ulcer disease",
"Peripheral vascular disease", "Renal disease", "Rheumatologic disease"
), class = "factor"), AIDS = c(20L, 6L, 3L, 5L, 1L, NA, NA, NA,
1L, 14L, 2L, NA, 1L, NA, NA, 3L, 5L, NA), `Any malignancy` = c(6L,
1051L, 108L, 209L, 106L, 34L, 130L, 158L, 14L, NA, 92L, 10L,
86L, NA, 37L, 190L, 161L, 32L), `Cerebrovascular disease` = c(3L,
108L, 421L, 122L, 105L, 44L, 47L, 94L, 35L, 303L, 35L, 7L, 67L,
NA, 23L, 165L, 111L, 15L), `Chronic pulmonary disease` = c(5L,
209L, 122L, 726L, 146L, 34L, 80L, 152L, 23L, 505L, 84L, 15L,
90L, NA, 43L, 234L, 173L, 31L), `Congestive heart failure` = c(1L,
106L, 105L, 146L, 381L, 27L, 37L, 124L, 15L, 269L, 43L, 3L, 121L,
NA, 18L, 166L, 160L, 16L), Dementia = c(NA, 34L, 44L, 34L, 27L,
109L, 11L, 32L, 5L, 74L, 5L, 1L, 26L, NA, 3L, 44L, 44L, 3L),
`Diabetes (mild to moderate)` = c(NA, 130L, 47L, 80L, 37L,
11L, 475L, NA, 8L, 335L, 58L, 9L, 36L, NA, 17L, 79L, 45L,
12L), `Diabetes with chronic complications` = c(NA, 158L,
94L, 152L, 124L, 32L, NA, 573L, 18L, 400L, 75L, 9L, 83L,
NA, 27L, 173L, 229L, 16L), `Hemoplegia or paralegia` = c(1L,
14L, 35L, 23L, 15L, 5L, 8L, 18L, 63L, 45L, 4L, NA, 13L, NA,
4L, 21L, 14L, 3L), `Metastatic solid tumor` = c(14L, NA,
303L, 505L, 269L, 74L, 335L, 400L, 45L, 2778L, 264L, 27L,
186L, NA, 79L, 492L, 455L, 68L), `Mild liver disease` = c(2L,
92L, 35L, 84L, 43L, 5L, 58L, 75L, 4L, 264L, 362L, NA, 29L,
NA, 22L, 73L, 70L, 8L), `Moderate to severe liver disease` = c(NA,
10L, 7L, 15L, 3L, 1L, 9L, 9L, NA, 27L, NA, 38L, 3L, NA, 6L,
11L, 10L, 1L), `Myocardial infarction` = c(1L, 86L, 67L,
90L, 121L, 26L, 36L, 83L, 13L, 186L, 29L, 3L, 277L, NA, 20L,
99L, 86L, 11L), `No Charlson Comorbidity` = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 49L, NA, NA, NA,
NA), `Peptic ulcer disease` = c(NA, 37L, 23L, 43L, 18L, 3L,
17L, 27L, 4L, 79L, 22L, 6L, 20L, NA, 118L, 33L, 31L, 4L),
`Peripheral vascular disease` = c(3L, 190L, 165L, 234L, 166L,
44L, 79L, 173L, 21L, 492L, 73L, 11L, 99L, NA, 33L, 693L,
197L, 22L), `Renal disease` = c(5L, 161L, 111L, 173L, 160L,
44L, 45L, 229L, 14L, 455L, 70L, 10L, 86L, NA, 31L, 197L,
633L, 24L), `Rheumatologic disease` = c(NA, 32L, 15L, 31L,
16L, 3L, 12L, 16L, 3L, 68L, 8L, 1L, 11L, NA, 4L, 22L, 24L,
106L)), row.names = c(NA, -18L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x5638d466ed90>, sorted = "diag_category_name")
I have a fix. However, you may need to change some things depending on how you use this. For example, when you add x-axis scrolling overflow is hidden. When the table headings are tilted, there will be an overflow. That equates to some of the header labels getting hidden.
datatable(df1, rownames = F, class = c("compact"),
options = list(
paging = F, autoWidth = F,
searching= FALSE,
scrollX = F,
initComplete = JS("function(settings, json) {
$(this.api().table().header()).css({
'font-size' : '12px'});}"),
headerCallback = JS("function(thead, data, start, end, display){
var $ths = $(thead).find('th');
$ths.css({'vertical-align': 'top', 'padding': '4px 0px',
'transform': 'rotate(180deg)', 'border': 'none'});
var betterCells = [];
$ths.each(function(){
var cell = $(this);
var newDiv = $('<div>', {width: '13px', float: 'left'});
var newInnerDiv = $('<div>', {text: cell.text()});
newDiv.css({margin: 'auto'});
newInnerDiv.css({
'writing-mode': 'vertical-rl',
'white-space': 'nowrap',
'text-align': 'left',
'transform-origin': 'top left',
'transform': 'rotate(45deg)',
'overflow': 'visible'
});
newDiv.append(newInnerDiv);
betterCells.push(newDiv);
});
$ths.each(function(i){
$(this).html(betterCells[i]);
});}"))) %>%
formatStyle(columns = c(1:19), `font-size` = '12px')
There's a lot going on here, and most of it was your code. However, if you have any questions about what's happening here, just let me know.

Loop with multiple subset of data frame

I have a data.frame fish.test0 for which I want to grep specific variables (in varlist) matching the group column to create a sub-data.frame that will undergo a statistical test. The results of the test is saved in tests.res.t. I want to loop the varlist so that I get one results for each input in varlist
Script:
varlist <- c("Abiotrophia","Alphatorquevirus")
for (i in varlist) {
fish.test <- fish.test0[grep("i",fish.test0$group),]
column <- c("ACDC")
tests <- list()
dat_test <- sapply( column, function(colx)
lapply( unique(fish.test$Merge), function(x)
fisher.test( data.frame(
a=c(( fish.test[ which(fish.test$Merge %in% x)[2],"Present"] -
fish.test[ which(fish.test$Merge %in% x)[2], colx] ),fish.test[ which(fish.test$Merge %in% x)[2], colx]
),
b=c(( fish.test[ which(fish.test$Merge %in% x)[1],"NotPresent"] -
fish.test[ which(fish.test$Merge %in% x)[1], colx] ), fish.test[ which(fish.test$Merge %in% x)[1], colx]))) #,alternative = "greater"
) )
rownames(dat_test) <- unique(fish.test$Merge )
colnames(dat_test) <- column
tests.res <- sapply(dat_test[1:dim(dat_test)[1],1], function(x) {
c(x$estimate[1],
x$estimate[2],
ci.lower = x$conf.int[1],
ci.upper = x$conf.int[2],
p.value = x$p.value)
})
tests.res.t <- as.data.frame(t(tests.res))
}
test-data:
fish.test0 <- structure(list(Present = c(4L, 4L, 9L, 9L, 57L, 57L, 146L, 146L,
91L, 91L, 26L, 26L, 6L, 6L, 12L, 12L, 33L, 33L, 10L, 10L, 66L,
66L, 4L, 4L, 4L, 4L, 9L, 9L, 18L, 18L, 19L, 19L, 51L, 51L, 50L,
50L, 12L, 12L, 7L, 7L, 14L, 14L, 27L, 27L, 9L, 9L, 5L, 5L, 6L,
6L, 22L, 22L, 3L, 3L, 14L, 14L, 4L, 4L, 15L, 15L, 6L, 6L, 8L,
8L, 4L, 4L), NotPresent = c(11L, 11L, 44L, 44L, 126L, 126L, 532L,
532L, 382L, 382L, 97L, 97L, 14L, 14L, 43L, 43L, 85L, 85L, 41L,
41L, 336L, 336L, 19L, 19L, 27L, 27L, 67L, 67L, 108L, 108L, 81L,
81L, 240L, 240L, 258L, 258L, 47L, 47L, 31L, 31L, 82L, 82L, 110L,
110L, 63L, 63L, 178L, 178L, 672L, 672L, 451L, 451L, 120L, 120L,
104L, 104L, 47L, 47L, 387L, 387L, 94L, 94L, 300L, 300L, 133L,
133L), group = c("G__Abiotrophia_NotPresent_Anus", "G__Abiotrophia_Present_Anus",
"G__Abiotrophia_NotPresent_Bile duct", "G__Abiotrophia_Present_Bile duct",
"G__Abiotrophia_NotPresent_Bone/Soft tissue", "G__Abiotrophia_Present_Bone/Soft tissue",
"G__Abiotrophia_NotPresent_Breast", "G__Abiotrophia_Present_Breast",
"G__Abiotrophia_NotPresent_Colorectum", "G__Abiotrophia_Present_Colorectum",
"G__Abiotrophia_NotPresent_Esophagus", "G__Abiotrophia_Present_Esophagus",
"G__Abiotrophia_NotPresent_Gallbladder", "G__Abiotrophia_Present_Gallbladder",
"G__Abiotrophia_NotPresent_Head and neck", "G__Abiotrophia_Present_Head and neck",
"G__Abiotrophia_NotPresent_Kidney", "G__Abiotrophia_Present_Kidney",
"G__Abiotrophia_NotPresent_Liver", "G__Abiotrophia_Present_Liver",
"G__Abiotrophia_NotPresent_Lung", "G__Abiotrophia_Present_Lung",
"G__Abiotrophia_NotPresent_Lymphoid tissue", "G__Abiotrophia_Present_Lymphoid tissue",
"G__Abiotrophia_NotPresent_Mesothelium", "G__Abiotrophia_Present_Mesothelium",
"G__Abiotrophia_NotPresent_Nervous system", "G__Abiotrophia_Present_Nervous system",
"G__Abiotrophia_NotPresent_Ovary", "G__Abiotrophia_Present_Ovary",
"G__Abiotrophia_NotPresent_Pancreas", "G__Abiotrophia_Present_Pancreas",
"G__Abiotrophia_NotPresent_Prostate", "G__Abiotrophia_Present_Prostate",
"G__Abiotrophia_NotPresent_Skin", "G__Abiotrophia_Present_Skin",
"G__Abiotrophia_NotPresent_Small intestine", "G__Abiotrophia_Present_Small intestine",
"G__Abiotrophia_NotPresent_Stomach", "G__Abiotrophia_Present_Stomach",
"G__Abiotrophia_NotPresent_Unknown", "G__Abiotrophia_Present_Unknown",
"G__Abiotrophia_NotPresent_Urothelial tract", "G__Abiotrophia_Present_Urothelial tract",
"G__Abiotrophia_NotPresent_Uterus", "G__Abiotrophia_Present_Uterus",
"G__Alphatorquevirus_NotPresent_Bone/Soft tissue", "G__Alphatorquevirus_Present_Bone/Soft tissue",
"G__Alphatorquevirus_NotPresent_Breast", "G__Alphatorquevirus_Present_Breast",
"G__Alphatorquevirus_NotPresent_Colorectum", "G__Alphatorquevirus_Present_Colorectum",
"G__Alphatorquevirus_NotPresent_Esophagus", "G__Alphatorquevirus_Present_Esophagus",
"G__Alphatorquevirus_NotPresent_Kidney", "G__Alphatorquevirus_Present_Kidney",
"G__Alphatorquevirus_NotPresent_Liver", "G__Alphatorquevirus_Present_Liver",
"G__Alphatorquevirus_NotPresent_Lung", "G__Alphatorquevirus_Present_Lung",
"G__Alphatorquevirus_NotPresent_Pancreas", "G__Alphatorquevirus_Present_Pancreas",
"G__Alphatorquevirus_NotPresent_Skin", "G__Alphatorquevirus_Present_Skin",
"G__Alphatorquevirus_NotPresent_Urothelial tract", "G__Alphatorquevirus_Present_Urothelial tract"
), ABCD = c(3L, 2L, 17L, 6L, 34L, 18L, 240L, 53L, 321L, 73L,
87L, 25L, 6L, 3L, 20L, 8L, 15L, 7L, 19L, 4L, 265L, 42L, 6L, 1L,
4L, 2L, 22L, 4L, 70L, 13L, 54L, 12L, 116L, 33L, 58L, 11L, 6L,
2L, 26L, 6L, 42L, 8L, 74L, 18L, 19L, 3L, 52L, 0L, 288L, 5L, 377L,
17L, 110L, 2L, 19L, 3L, 21L, 2L, 298L, 9L, 60L, 6L, 68L, 1L,
89L, 3L), Total = c(15L, 15L, 53L, 53L, 183L, 183L, 678L, 678L,
473L, 473L, 123L, 123L, 20L, 20L, 55L, 55L, 118L, 118L, 51L,
51L, 402L, 402L, 23L, 23L, 31L, 31L, 76L, 76L, 126L, 126L, 100L,
100L, 291L, 291L, 308L, 308L, 59L, 59L, 38L, 38L, 96L, 96L, 137L,
137L, 72L, 72L, 183L, 183L, 678L, 678L, 473L, 473L, 123L, 123L,
118L, 118L, 51L, 51L, 402L, 402L, 100L, 100L, 308L, 308L, 137L,
137L), Merge = c("Abiotrophia_Anus", "Abiotrophia_Anus", "Abiotrophia_Bile duct",
"Abiotrophia_Bile duct", "Abiotrophia_Bone/Soft tissue", "Abiotrophia_Bone/Soft tissue",
"Abiotrophia_Breast", "Abiotrophia_Breast", "Abiotrophia_Colorectum",
"Abiotrophia_Colorectum", "Abiotrophia_Esophagus", "Abiotrophia_Esophagus",
"Abiotrophia_Gallbladder", "Abiotrophia_Gallbladder", "Abiotrophia_Head and neck",
"Abiotrophia_Head and neck", "Abiotrophia_Kidney", "Abiotrophia_Kidney",
"Abiotrophia_Liver", "Abiotrophia_Liver", "Abiotrophia_Lung",
"Abiotrophia_Lung", "Abiotrophia_Lymphoid tissue", "Abiotrophia_Lymphoid tissue",
"Abiotrophia_Mesothelium", "Abiotrophia_Mesothelium", "Abiotrophia_Nervous system",
"Abiotrophia_Nervous system", "Abiotrophia_Ovary", "Abiotrophia_Ovary",
"Abiotrophia_Pancreas", "Abiotrophia_Pancreas", "Abiotrophia_Prostate",
"Abiotrophia_Prostate", "Abiotrophia_Skin", "Abiotrophia_Skin",
"Abiotrophia_Small intestine", "Abiotrophia_Small intestine",
"Abiotrophia_Stomach", "Abiotrophia_Stomach", "Abiotrophia_Unknown",
"Abiotrophia_Unknown", "Abiotrophia_Urothelial tract", "Abiotrophia_Urothelial tract",
"Abiotrophia_Uterus", "Abiotrophia_Uterus", "Alphatorquevirus_Bone/Soft tissue",
"Alphatorquevirus_Bone/Soft tissue", "Alphatorquevirus_Breast",
"Alphatorquevirus_Breast", "Alphatorquevirus_Colorectum", "Alphatorquevirus_Colorectum",
"Alphatorquevirus_Esophagus", "Alphatorquevirus_Esophagus", "Alphatorquevirus_Kidney",
"Alphatorquevirus_Kidney", "Alphatorquevirus_Liver", "Alphatorquevirus_Liver",
"Alphatorquevirus_Lung", "Alphatorquevirus_Lung", "Alphatorquevirus_Pancreas",
"Alphatorquevirus_Pancreas", "Alphatorquevirus_Skin", "Alphatorquevirus_Skin",
"Alphatorquevirus_Urothelial tract", "Alphatorquevirus_Urothelial tract"
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 9L, 12L,
11L, 13L, 14L, 16L, 15L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 28L, 27L, 29L, 30L, 31L, 32L, 34L, 33L, 35L, 36L, 38L,
37L, 40L, 39L, 42L, 43L, 45L, 44L, 47L, 46L, 1011L, 1012L, 1014L,
1013L, 1015L, 1016L, 1017L, 1018L, 1019L, 1020L, 1022L, 1021L,
1023L, 1024L, 1026L, 1025L, 1027L, 1028L, 1029L, 1030L), class = "data.frame")
This is probably not an answer but it should help to improve you code. If I'm terribly wrong, I'll remove my answer right away. I have loeft out the test business which I don't understand, but your problem seems to be extraction.
The first thing is that you need to remove the quotation marks in your grep command, try:
varlist <- c("Abiotrophia","Alphatorquevirus")
for( i in varlist )
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( i, fish.test0$group ), ]
print( head( fish.test ) )
}
From what I understand, you need to define column and tests outside your loop. Does that give you more of what you want:
varlist <- c("Abiotrophia","Alphatorquevirus")
column <- "ACDC"
tests <- list()
for( i in 1 : length( varlist ) ) # index can be used later to fill the list
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( varlist[ i ], fish.test0$group ), ]
# add a column with your name of choice
fish.test <- cbind( fish.test, c( 1: length( fish.test$group ) ) )
colnames( fish.test )[ length( fish.test ) ] <- column
# write each result into your defined list
tests[[ i ]] <- fish.test
}

Exact number of every value next to the bar [duplicate]

This question already has answers here:
How to put labels over geom_bar in R with ggplot2
(4 answers)
Closed 5 years ago.
Having a dataset like this:
df <- structure(list(word = structure(c(1L, 12L, 23L, 34L, 43L, 44L,
45L, 46L, 47L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L), .Label = c("word1", "word10", "word11", "word12", "word13",
"word14", "word15", "word16", "word17", "word18", "word19", "word2",
"word20", "word21", "word22", "word23", "word24", "word25", "word26",
"word27", "word28", "word29", "word3", "word30", "word31", "word32",
"word33", "word34", "word35", "word36", "word37", "word38", "word39",
"word4", "word40", "word41", "word42", "word43", "word44", "word45",
"word46", "word47", "word5", "word6", "word7", "word8", "word9"
), class = "factor"), frq = c(1975L, 1665L, 1655L, 1469L, 1464L,
1451L, 1353L, 1309L, 1590L, 1545L, 1557L, 1556L, 1130L, 1153L,
1151L, 1150L, 1144L, 1141L, 1115L, 194L, 195L, 135L, 135L, 130L,
163L, 167L, 164L, 159L, 153L, 145L, 143L, 133L, 133L, 153L, 153L,
150L, 119L, 115L, 115L, 115L, 114L, 113L, 113L, 113L, 115L, 102L,
101L)), .Names = c("word", "frq"), class = "data.frame", row.names = c(NA,
-47L))
With this command lines I produce a bar plot graph
dat2 = transform(df,word = reorder(word,frq))
df2 <- head(dat2, 10)
p = ggplot(df2, aes(x = word, y = frq)) + geom_bar(stat = "identity", fill = "yellow")
p2 <- p +coord_flip()
How is it possible to have the number of frq in the end of every bar?
I would use annotate..
p2 + annotate(geom = "text",x = df2$word, y= df2$frq, label = df2$frq)

Scale in word cloud

I have this dataframe
df <- structure(list(word = structure(c(1L, 12L, 23L, 34L, 43L, 44L,
45L, 46L, 47L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L), .Label = c("word1", "word10", "word11", "word12", "word13",
"word14", "word15", "word16", "word17", "word18", "word19", "word2",
"word20", "word21", "word22", "word23", "word24", "word25", "word26",
"word27", "word28", "word29", "word3", "word30", "word31", "word32",
"word33", "word34", "word35", "word36", "word37", "word38", "word39",
"word4", "word40", "word41", "word42", "word43", "word44", "word45",
"word46", "word47", "word5", "word6", "word7", "word8", "word9"
), class = "factor"), frq = c(1975L, 1665L, 1655L, 1469L, 1464L,
1451L, 1353L, 1309L, 1590L, 1545L, 1557L, 1556L, 1130L, 1153L,
1151L, 1150L, 1144L, 1141L, 1115L, 194L, 195L, 135L, 135L, 130L,
163L, 167L, 164L, 159L, 153L, 145L, 143L, 133L, 133L, 153L, 153L,
150L, 119L, 115L, 115L, 115L, 114L, 113L, 113L, 113L, 115L, 102L,
101L)), .Names = c("word", "frq"), class = "data.frame", row.names = c(NA,
-47L))
And I would like to create a word cloud based on the frequency.
In order to make this word cloud I use this lines of code:
library(wordcloud2)
wordcloud2(df, color = "random-light", backgroundColor = "green")
However an issue I face is that the word cloud doesn't have all the words. When I refer all words I mean these words with the lowest frequency.
Is there any scale in order to have all words in word cloud and the most frequency word be a little smaller in order to see all words?
As I can understand this happens because the highest frequencies have big difference with the others in the list
You should directly rescale your frequencies. You could for example do :
p_scaled = p^(1/n) with n sufficiently big so that even the words with the lowest frequencies would appear on the word cloud.

Why Does this ggplot with POSIXct dates give an Error in cut.default, cannot allocate vector

I'm running a report, and have pulled out some data around something happening over time. I have then summarised that data into a dataframe of date/times and counts. When I try to plot it via ggplot2 I get an error
> ggplot(foo, aes(x=Date, y=Count))+
+ geom_line()
Error in cut.default(unclass(x), unclass(breaks), labels = labels, right = right, :
cannot allocate vector of length 1317423601
>
> ggplot(foo[2:349,], aes(x=Date, y=Count))+
+ geom_line()
Plot produced...
My data is using POSIXct dates, which do seem to cover the end of British Summer Time. I also note that if I excldue the first row of data it works !. Any ideas ?
Here's my data
> dput(foo)
structure(list(Date = structure(c(1317423600, 1317445200, 1317466800,
1317488400, 1317510000, 1317553200, 1317574800, 1317596400, 1317618000,
1317639600, 1317661200, 1317682800, 1317704400, 1317726000, 1317747600,
1317769200, 1317790800, 1317812400, 1317834000, 1317855600, 1317877200,
1317898800, 1317920400, 1317942000, 1317963600, 1317985200, 1318006800,
1318028400, 1318050000, 1318071600, 1318093200, 1318114800, 1318136400,
1318158000, 1318179600, 1318201200, 1318222800, 1318244400, 1318266000,
1318287600, 1318309200, 1318330800, 1318352400, 1318374000, 1318395600,
1318417200, 1318438800, 1318460400, 1318503600, 1318525200, 1318546800,
1318568400, 1318590000, 1318611600, 1318633200, 1318654800, 1318676400,
1318698000, 1318719600, 1318762800, 1318784400, 1318806000, 1318827600,
1318849200, 1318870800, 1318892400, 1318914000, 1318935600, 1318957200,
1318978800, 1319000400, 1319022000, 1319043600, 1319065200, 1319086800,
1319108400, 1319130000, 1319151600, 1319173200, 1319194800, 1319216400,
1319238000, 1319259600, 1319281200, 1319302800, 1319324400, 1319346000,
1319367600, 1319410800, 1319432400, 1319454000, 1319475600, 1319497200,
1319518800, 1319540400, 1319562000, 1319583600, 1319605200, 1319626800,
1319648400, 1319670000, 1319691600, 1319713200, 1319734800, 1319756400,
1319778000, 1319799600, 1319821200, 1319842800, 1319864400, 1319886000,
1319907600, 1319929200, 1319994000, 1320015600, 1320037200, 1320058800,
1320080400, 1320102000, 1320123600, 1320145200, 1320166800, 1320188400,
1320210000, 1320231600, 1320253200, 1320274800, 1320296400, 1320318000,
1320339600, 1320361200, 1320382800, 1320404400, 1320426000, 1320447600,
1320469200, 1320490800, 1320512400, 1320534000, 1320577200, 1320598800,
1320620400, 1320642000, 1320663600, 1320685200, 1320706800, 1320750000,
1320771600, 1320793200, 1320814800, 1320836400, 1320858000, 1320879600,
1320901200, 1320922800, 1320944400, 1320966000, 1320987600, 1321009200,
1321030800, 1321052400, 1321074000, 1321095600, 1321117200, 1321138800,
1321182000, 1321203600, 1321225200, 1321246800, 1321268400, 1321290000,
1321311600, 1321333200, 1321354800, 1321376400, 1321398000, 1321419600,
1321441200, 1321462800, 1321484400, 1321506000, 1321527600, 1321549200,
1321570800, 1321592400, 1321614000, 1321635600, 1321657200, 1321678800,
1321700400, 1321722000, 1321743600, 1321765200, 1321786800, 1321808400,
1321830000, 1321851600, 1321873200, 1321894800, 1321916400, 1321938000,
1321959600, 1321981200, 1322002800, 1322024400, 1322046000, 1322067600,
1322089200, 1322110800, 1322132400, 1322154000, 1322175600, 1322197200,
1322218800, 1322240400, 1322262000, 1322305200, 1322326800, 1322370000,
1322391600, 1322413200, 1322434800, 1322456400, 1322478000, 1322499600,
1322521200, 1322542800, 1322564400, 1322586000, 1322607600, 1322629200,
1322650800, 1322672400, 1322694000, 1322715600, 1322737200, 1322758800,
1322780400, 1322802000, 1322823600, 1322845200, 1322866800, 1322888400,
1322910000, 1322931600, 1322953200, 1322974800, 1322996400, 1323018000,
1323039600, 1323061200, 1323082800, 1323104400, 1323126000, 1323147600,
1323169200, 1323190800, 1323212400, 1323234000, 1323255600, 1323277200,
1323298800, 1323320400, 1323342000, 1323363600, 1323385200, 1323406800,
1323428400, 1323450000, 1323471600, 1323493200, 1323514800, 1323558000,
1323579600, 1323601200, 1323622800, 1323644400, 1323666000, 1323687600,
1323709200, 1323730800, 1323752400, 1323774000, 1323795600, 1323817200,
1323838800, 1323860400, 1323882000, 1323903600, 1323925200, 1323946800,
1323968400, 1323990000, 1324011600, 1324033200, 1324054800, 1324076400,
1324098000, 1324119600, 1324141200, 1324162800, 1324206000, 1324227600,
1324249200, 1324270800, 1324292400, 1324314000, 1324335600, 1324357200,
1324378800, 1324400400, 1324422000, 1324443600, 1324465200, 1324486800,
1324508400, 1324530000, 1324551600, 1324573200, 1324594800, 1324616400,
1324638000, 1324659600, 1324681200, 1324702800, 1324724400, 1324767600,
1324832400, 1324854000, 1324875600, 1324897200, 1324918800, 1324940400,
1324962000, 1324983600, 1325005200, 1325026800, 1325048400, 1325070000,
1325091600, 1325113200, 1325134800, 1325156400, 1325178000, 1325199600,
1325221200, 1325242800, 1325264400, 1325286000), class = c("POSIXct",
"POSIXt"), tzone = ""), Count = c(3L, 0L, 9L, 1L, 0L, 1L, 6L,
4L, 4L, 52L, 19L, 7L, 5L, 59L, 30L, 3L, 2L, 50L, 25L, 8L, 4L,
41L, 22L, 4L, 8L, 57L, 12L, 14L, 3L, 10L, 2L, 6L, 0L, 1L, 7L,
10L, 12L, 44L, 19L, 11L, 3L, 47L, 31L, 7L, 9L, 56L, 21L, 11L,
54L, 20L, 10L, 6L, 54L, 17L, 0L, 1L, 11L, 2L, 0L, 2L, 4L, 14L,
9L, 52L, 19L, 11L, 10L, 56L, 33L, 12L, 9L, 57L, 20L, 12L, 5L,
51L, 23L, 14L, 5L, 50L, 9L, 1L, 2L, 5L, 7L, 1L, 0L, 3L, 9L, 13L,
57L, 27L, 10L, 7L, 62L, 29L, 5L, 6L, 53L, 22L, 8L, 10L, 53L,
12L, 7L, 9L, 38L, 8L, 1L, 0L, 9L, 3L, 0L, 6L, 9L, 3L, 81L, 27L,
6L, 5L, 67L, 45L, 6L, 3L, 63L, 43L, 10L, 2L, 57L, 38L, 19L, 12L,
54L, 22L, 1L, 0L, 5L, 4L, 0L, 4L, 2L, 10L, 4L, 53L, 44L, 7L,
65L, 41L, 11L, 7L, 61L, 46L, 7L, 4L, 64L, 48L, 10L, 11L, 56L,
39L, 6L, 1L, 4L, 3L, 0L, 7L, 3L, 10L, 4L, 46L, 45L, 16L, 6L,
69L, 46L, 17L, 1L, 67L, 43L, 15L, 5L, 57L, 40L, 14L, 4L, 56L,
36L, 3L, 0L, 11L, 3L, 0L, 1L, 5L, 2L, 5L, 9L, 59L, 45L, 9L, 7L,
71L, 35L, 19L, 10L, 65L, 23L, 5L, 7L, 10L, 2L, 5L, 6L, 5L, 1L,
0L, 3L, 2L, 0L, 5L, 7L, 10L, 8L, 58L, 46L, 16L, 6L, 70L, 52L,
14L, 8L, 84L, 42L, 10L, 6L, 62L, 44L, 11L, 3L, 58L, 28L, 3L,
0L, 9L, 8L, 1L, 0L, 4L, 2L, 10L, 11L, 65L, 53L, 14L, 11L, 73L,
42L, 14L, 8L, 74L, 33L, 15L, 13L, 75L, 53L, 11L, 10L, 61L, 31L,
1L, 2L, 9L, 2L, 0L, 3L, 1L, 15L, 16L, 114L, 52L, 10L, 14L, 75L,
50L, 14L, 9L, 69L, 52L, 12L, 15L, 77L, 35L, 10L, 5L, 69L, 37L,
5L, 1L, 12L, 10L, 0L, 9L, 4L, 14L, 18L, 90L, 35L, 12L, 9L, 87L,
49L, 10L, 22L, 83L, 41L, 15L, 14L, 79L, 48L, 17L, 6L, 40L, 8L,
2L, 0L, 2L, 0L, 1L, 7L, 2L, 1L, 2L, 6L, 2L, 54L, 31L, 11L, 6L,
54L, 32L, 7L, 9L, 63L, 23L, 16L, 6L, 43L, 17L, 3L)), .Names = c("Date",
"Count"), row.names = c(1L, 2L, 3L, 4L, 5L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L,
78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L,
91L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L,
115L, 116L, 117L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L,
128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L,
139L, 140L, 141L, 142L, 143L, 144L, 145L, 147L, 148L, 149L, 150L,
151L, 152L, 153L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L,
163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L,
175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 184L, 185L,
186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L, 195L, 196L,
197L, 198L, 199L, 200L, 201L, 202L, 203L, 204L, 205L, 206L, 207L,
208L, 209L, 210L, 211L, 212L, 213L, 214L, 215L, 216L, 217L, 218L,
219L, 220L, 221L, 222L, 223L, 224L, 225L, 227L, 228L, 230L, 231L,
232L, 233L, 234L, 235L, 236L, 237L, 238L, 239L, 240L, 241L, 242L,
243L, 244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L, 252L, 253L,
254L, 255L, 256L, 257L, 258L, 259L, 260L, 261L, 262L, 263L, 264L,
265L, 266L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 274L, 275L,
276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L, 285L, 286L, 287L,
288L, 289L, 290L, 291L, 292L, 293L, 294L, 295L, 296L, 297L, 298L,
299L, 300L, 301L, 302L, 303L, 304L, 305L, 306L, 307L, 308L, 309L,
310L, 311L, 312L, 313L, 315L, 316L, 317L, 318L, 319L, 320L, 321L,
322L, 323L, 324L, 325L, 326L, 327L, 328L, 329L, 330L, 331L, 332L,
333L, 334L, 335L, 336L, 337L, 338L, 339L, 341L, 344L, 345L, 346L,
347L, 348L, 349L, 350L, 351L, 352L, 353L, 354L, 355L, 356L, 357L,
358L, 359L, 360L, 361L, 362L, 363L, 364L, 365L), class = "data.frame")
and here's my original code
ggplot(foo, aes(x=Date, y=Count))+
geom_line()

Resources