Iteration over all variables in a dataframe - r

I have found a useful mean imputation technique here
.
More specifically:
variable[is.na(variable)] <- rowMeans(cbind(variable[which(is.na(variable))-1],
variable[which(is.na(variable))+1]))
Which takes values before and after the missing one and imputes their mean.
However, since I have a large data frame with lots of variables I was wondering is there a way to iterate this function over every variable (column) in the df?
dput:
dput(head(politbar_timeseries,10))
structure(list(Month = structure(c(8401, 8432, 8460, 8491, 8521,
8552, 8582, 8613, 8644, 8674), class = "Date"), Intention_CDU = c(246L,
223L, 222L, 232L, 261L, 240L, 241L, NA, 234L, 211L), Intention_SPD = c(304L,
323L, 276L, 274L, 238L, 290L, 291L, NA, 284L, 296L), Intention_FDP = c(47L,
44L, 46L, 36L, 35L, 50L, 31L, NA, 33L, 40L), Intention_Green = c(112L,
90L, 108L, 97L, 92L, 93L, 80L, NA, 131L, 97L), Intention_PDS = c(1L,
2L, 1L, 4L, 2L, 4L, 6L, NA, 3L, 1L), Intention_Right = c(40L,
45L, 51L, 44L, 48L, 26L, 30L, NA, 33L, 39L), CDU_CSU_Scale = c(5.53364976051333,
5.41668954145634, 5.41361737597252, 5.53237142973321, 5.90556125077522,
5.65325991093138, 5.66581907651607, NA, 5.7568395653053, 5.56722081960557
), SPD_Scale = c(6.68501038883942, 7.0740019675866, 6.31415136355633,
6.52447895467401, 6.29176231355408, 6.52870415235848, 6.73302006301497,
NA, 7.12547563426403, 7.17833309669175), FDP_Scale = c(5.34570000100596,
5.73343004031828, 5.52174547729524, 5.39618098094715, 5.81980921102384,
5.64326882828348, 5.70136552543044, NA, 5.3836387964029, 5.73726720856055
), Grüne_Scale = c(5.73191750379599, 6.03715643205545, 6.19893648691653,
5.96106479727169, 5.78436018957346, 5.54482751153172, 5.6213169156508,
NA, 6.42776109093573, 6.33016932291559), Republikaner_Scale = c(2.33415238404679,
2.40200426439232, 2.50591428720572, 2.45599753445912, 2.61170073660812,
2.26120872300811, 2.24409536048212, NA, 2.29699201198203, 2.25876734042663
), PDS_Scale = c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NA, NaN,
NaN)), .Names = c("Month", "Intention_CDU", "Intention_SPD",
"Intention_FDP", "Intention_Green", "Intention_PDS", "Intention_Right",
"CDU_CSU_Scale", "SPD_Scale", "FDP_Scale", "Grüne_Scale", "Republikaner_Scale",
"PDS_Scale"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 249L,
8L, 9L), class = "data.frame")

Related

R datatable rotate header not aligning

I am trying to rotate the header for a datatable. I was able to rotate the header but the rotated header is not aligning with the columns. This is what I have now:
I referenced this for my code : header direction in shiny data table
Here is my code:
datatable(df, rownames = F,class = c("compact"),options = list(paging = F, autoWidth = T,searching= FALSE,
scrollX=T,
initComplete = JS("function(settings, json) {$(this.api().table().header()).css({'font-size' : '12px'});}"),
headerCallback = JS(
"function(thead, data, start, end, display){
var $ths = $(thead).find('th');
$ths.css({'vertical-align': 'bottom', 'white-space': 'nowrap'});
var betterCells = [];
$ths.each(function(){
var cell = $(this);
var newDiv = $('<div>', {height: 'auto', width: cell.height()});
var newInnerDiv = $('<div>', {text: cell.text()});
newDiv.css({margin: 'auto'});
newInnerDiv.css({
transform: 'rotate(220deg)',
'writing-mode': 'tb-rl',
'white-space': 'nowrap'
});
newDiv.append(newInnerDiv);
betterCells.push(newDiv);
});
$ths.each(function(i){
$(this).html(betterCells[i]);
});
}"
))) %>%
formatStyle(columns = c(1:19), `font-size` = '12px')
DATA:
structure(list(diag_category_name = structure(1:18, .Label = c("AIDS",
"Any malignancy", "Cerebrovascular disease", "Chronic pulmonary disease",
"Congestive heart failure", "Dementia", "Diabetes (mild to moderate)",
"Diabetes with chronic complications", "Hemoplegia or paralegia",
"Metastatic solid tumor", "Mild liver disease", "Moderate to severe liver disease",
"Myocardial infarction", "No Charlson Comorbidity", "Peptic ulcer disease",
"Peripheral vascular disease", "Renal disease", "Rheumatologic disease"
), class = "factor"), AIDS = c(20L, 6L, 3L, 5L, 1L, NA, NA, NA,
1L, 14L, 2L, NA, 1L, NA, NA, 3L, 5L, NA), `Any malignancy` = c(6L,
1051L, 108L, 209L, 106L, 34L, 130L, 158L, 14L, NA, 92L, 10L,
86L, NA, 37L, 190L, 161L, 32L), `Cerebrovascular disease` = c(3L,
108L, 421L, 122L, 105L, 44L, 47L, 94L, 35L, 303L, 35L, 7L, 67L,
NA, 23L, 165L, 111L, 15L), `Chronic pulmonary disease` = c(5L,
209L, 122L, 726L, 146L, 34L, 80L, 152L, 23L, 505L, 84L, 15L,
90L, NA, 43L, 234L, 173L, 31L), `Congestive heart failure` = c(1L,
106L, 105L, 146L, 381L, 27L, 37L, 124L, 15L, 269L, 43L, 3L, 121L,
NA, 18L, 166L, 160L, 16L), Dementia = c(NA, 34L, 44L, 34L, 27L,
109L, 11L, 32L, 5L, 74L, 5L, 1L, 26L, NA, 3L, 44L, 44L, 3L),
`Diabetes (mild to moderate)` = c(NA, 130L, 47L, 80L, 37L,
11L, 475L, NA, 8L, 335L, 58L, 9L, 36L, NA, 17L, 79L, 45L,
12L), `Diabetes with chronic complications` = c(NA, 158L,
94L, 152L, 124L, 32L, NA, 573L, 18L, 400L, 75L, 9L, 83L,
NA, 27L, 173L, 229L, 16L), `Hemoplegia or paralegia` = c(1L,
14L, 35L, 23L, 15L, 5L, 8L, 18L, 63L, 45L, 4L, NA, 13L, NA,
4L, 21L, 14L, 3L), `Metastatic solid tumor` = c(14L, NA,
303L, 505L, 269L, 74L, 335L, 400L, 45L, 2778L, 264L, 27L,
186L, NA, 79L, 492L, 455L, 68L), `Mild liver disease` = c(2L,
92L, 35L, 84L, 43L, 5L, 58L, 75L, 4L, 264L, 362L, NA, 29L,
NA, 22L, 73L, 70L, 8L), `Moderate to severe liver disease` = c(NA,
10L, 7L, 15L, 3L, 1L, 9L, 9L, NA, 27L, NA, 38L, 3L, NA, 6L,
11L, 10L, 1L), `Myocardial infarction` = c(1L, 86L, 67L,
90L, 121L, 26L, 36L, 83L, 13L, 186L, 29L, 3L, 277L, NA, 20L,
99L, 86L, 11L), `No Charlson Comorbidity` = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 49L, NA, NA, NA,
NA), `Peptic ulcer disease` = c(NA, 37L, 23L, 43L, 18L, 3L,
17L, 27L, 4L, 79L, 22L, 6L, 20L, NA, 118L, 33L, 31L, 4L),
`Peripheral vascular disease` = c(3L, 190L, 165L, 234L, 166L,
44L, 79L, 173L, 21L, 492L, 73L, 11L, 99L, NA, 33L, 693L,
197L, 22L), `Renal disease` = c(5L, 161L, 111L, 173L, 160L,
44L, 45L, 229L, 14L, 455L, 70L, 10L, 86L, NA, 31L, 197L,
633L, 24L), `Rheumatologic disease` = c(NA, 32L, 15L, 31L,
16L, 3L, 12L, 16L, 3L, 68L, 8L, 1L, 11L, NA, 4L, 22L, 24L,
106L)), row.names = c(NA, -18L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x5638d466ed90>, sorted = "diag_category_name")
I have a fix. However, you may need to change some things depending on how you use this. For example, when you add x-axis scrolling overflow is hidden. When the table headings are tilted, there will be an overflow. That equates to some of the header labels getting hidden.
datatable(df1, rownames = F, class = c("compact"),
options = list(
paging = F, autoWidth = F,
searching= FALSE,
scrollX = F,
initComplete = JS("function(settings, json) {
$(this.api().table().header()).css({
'font-size' : '12px'});}"),
headerCallback = JS("function(thead, data, start, end, display){
var $ths = $(thead).find('th');
$ths.css({'vertical-align': 'top', 'padding': '4px 0px',
'transform': 'rotate(180deg)', 'border': 'none'});
var betterCells = [];
$ths.each(function(){
var cell = $(this);
var newDiv = $('<div>', {width: '13px', float: 'left'});
var newInnerDiv = $('<div>', {text: cell.text()});
newDiv.css({margin: 'auto'});
newInnerDiv.css({
'writing-mode': 'vertical-rl',
'white-space': 'nowrap',
'text-align': 'left',
'transform-origin': 'top left',
'transform': 'rotate(45deg)',
'overflow': 'visible'
});
newDiv.append(newInnerDiv);
betterCells.push(newDiv);
});
$ths.each(function(i){
$(this).html(betterCells[i]);
});}"))) %>%
formatStyle(columns = c(1:19), `font-size` = '12px')
There's a lot going on here, and most of it was your code. However, if you have any questions about what's happening here, just let me know.

Widening Data and Changing Columns

I have managed to delete a little bit of code that did the below task and can't for the life of me figure out how I did it before.
I want to widen the data that has two factors spread over 8 different 'waves'. There are four 'Paper' factors, each with the same four internal factors 'Response'. The output from a previously required function gives the following dataframe:
[
And I would like to make it look like this:
The single column of the first tibble has become the single row of the second tibble.
As you can see, the second tibble has extra factors of Paper but these can just be joined row wise.
I really wasn't sure how to attack this, but thought it would be done using the pivot_wider function. When I tried
times_correct <- times_19 %>%
pivot_wider( id_cols = c('Stay/remain in the EU`', 'Leave the EU', 'I would/will not vote', 'Don\'t know'), names_from = eurrefcolnames)
I got the error that I can't subset columns that don't exist which makes sense: I need to manually add the correct 'Waves'. I think this is relatively simple, but can't for the life of me figure out how I did it!
Here is the dput of the various tibbles:
structure(list(resp = structure(c(3L, 2L, 4L, 1L, NA, NA, NA,
NA), .Label = c("Don't Know", "Leave", "Remain", "Will Not Vote"
), class = "factor"), `Stay/remain in the EU` = c(316L, 290L,
313L, 324L, 338L, 320L, 325L, 335L), `Leave the EU` = c(157L,
123L, 159L, 154L, 134L, 189L, 187L, 181L), `I would/will not vote` = c(2L,
3L, 3L, 3L, 2L, 2L, 2L, 0L), `Don't know` = c(56L, 51L, 55L,
50L, 57L, 20L, 17L, 0L), Paper = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "Times", class = "factor")), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(resp = structure(c(3L, 2L, 4L, 1L, 3L, 2L, 4L,
1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L), .Label = c("Don't Know",
"Leave", "Remain", "Will Not Vote"), class = "factor"), euRefVoteW1 = c(316L,
157L, 2L, 56L, 190L, 339L, 4L, 70L, 819L, 79L, 9L, 71L, 1294L,
1311L, 150L, 523L, 1715L, 2587L, 133L, 630L), euRefVoteW2 = c(290L,
123L, 3L, 51L, 175L, 282L, 3L, 62L, 777L, 74L, 5L, 62L, 1091L,
925L, 80L, 371L, 1528L, 2044L, 83L, 517L), euRefVoteW3 = c(313L,
159L, 3L, 55L, 199L, 334L, 4L, 69L, 835L, 81L, 10L, 57L, 1348L,
1289L, 139L, 508L, 1766L, 2563L, 156L, 586L), euRefVoteW4 = c(324L,
154L, 3L, 50L, 215L, 328L, 2L, 61L, 848L, 70L, 10L, 55L, 1397L,
1267L, 128L, 492L, 1853L, 2494L, 143L, 583L), euRefVoteW6 = c(338L,
134L, 2L, 57L, 241L, 286L, 2L, 77L, 853L, 68L, 5L, 57L, 1519L,
1133L, 112L, 520L, 2017L, 2284L, 106L, 667L), euRefVoteW7 = c(320L,
189L, 2L, 20L, 186L, 384L, 2L, 34L, 832L, 109L, 8L, 34L, 1449L,
1456L, 87L, 292L, 1906L, 2785L, 55L, 328L), euRefVoteW8 = c(325L,
187L, 2L, 17L, 187L, 384L, 1L, 34L, 836L, 118L, 5L, 24L, 1462L,
1522L, 72L, 228L, 1898L, 2852L, 56L, 268L), euRefVoteW9 = c(335L,
181L, 0L, 0L, 206L, 385L, 0L, 6L, 844L, 102L, 0L, 4L, 1572L,
1462L, 0L, 21L, 2018L, 2827L, 0L, 20L), Paper = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L), .Label = c("Times", "Telegraph", "Control", "No_Paper",
"Rest"), class = "factor")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
eurrefcolnames = c('euRefVoteW1','euRefVoteW2', 'euRefVoteW3', 'euRefVoteW4', 'euRefVoteW6',' euRefVoteW7', 'euRefVoteW8', 'euRefVoteW9')
EDIT:
Here is the function that create the initial dataframes, is there an edit I could make here perhaps ?
tally_reader_number <- function(input_dataframe,newspaper_name) {
#function takes the input of in_all_waves, tallies the number of different eu ref responses using map_df for a given newspaper factor (defined above)
# and returns a dataframe of responese for each wave with the newspaper factor as a column
returned_dataframe <- input_dataframe %>%
filter(Paper == newspaper_name) %>%
ungroup() %>% #function refuses to work without this
select(-Paper) %>%
map_df(table) %>% # use map_df from the purrr package to "table" each column
rownames_to_column("response") %>% #convert the rownames to a column named response
mutate(resp = case_when(response == 1 ~ "Remain", #change the resulting numbers to the correct responses
response == 2 ~ "Leave",
response ==3 ~ "Will Not Vote",
response == 4 ~ "Don't Know")) %>%
select(resp, everything(), -response) %>% #reorder the columns with resp at the front, removing response
mutate(Paper = newspaper_name)
returned_dataframe$Paper <- as.factor(returned_dataframe$Paper)
returned_dataframe$resp <- as.factor(returned_dataframe$resp)
returned_dataframe
}

order geom_point by specific facet

I have a ggplot related question, which should be easy but I could not find the answer yet. I am trying to plot a faceted plot with the code below and this dataset (11 kB).
ggplot(plot.dat, aes(x = estimate, y = reorder(countryyear, estimate))) +
geom_point() +
geom_segment(aes(x=conf.low, xend=conf.high, yend=countryyear)) +
facet_grid(. ~ facet) +
xlab("Random Effect Estimate") +
ylab("") + scale_x_continuous(breaks=c(seq(0, 5, 1)), limits=c(0, 5)) +
ggtitle("Random Slopes in Country*Year Groups from Northwestern Europe") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
I would like countryyear to be organized by the values of estimate in the Extreme Right facet. Not quite sure how to order by values of a specific facet. Any ideas are welcome! Thanks.
Update: Here is the dput structure of a random subset of the dataset. It has some missing values, but it should work for the sake of the example. I also updated the download link above, that has the full version.
structure(list(estimate = c(1.41056902925372, 0.854859208455895,
1.16012834593894, 0.871339033194504, 0.803272289946221, 1.17540386134493,
0.996313357490551, 1.49940694539732, 1.33773365908762, 2.7318703090905,
1.19131935418045, 1.12765907711738, 0.746741192261761, 0.985847015192172,
0.912357310925342, 1.11582763712164, 1.21854572824977, 0.675712547978394,
0.566955524699616, 1.32611743759365, 0.519648352294682, 0.591013596394243,
1.30944973684044, 0.613722269599125, 1.13293279727271, 0.950788678552604,
1.1599446923567, 1.11493952112913, 0.95336321045095, 1.39002327097034,
0.794207546872633, 0.788545101449259, 1.01096883872495, 0.897407203907834,
1.38391605229103, 1.35754760293107, 1.0718508539761, 0.542191158958878,
0.757132752456427, 1.44172863221312, 1.04842251986171, 0.77260404885379,
0.879288027642055, 1.09372353598088, 0.745484830381145, 1.21211217249353,
0.628009608902132, 1.34864488674734), countryyear = structure(c(1L,
2L, 4L, 5L, 7L, 9L, 10L, 12L, 13L, 26L, 28L, 29L, 31L, 32L, 34L,
36L, 37L, 39L, 40L, 57L, 59L, 60L, 62L, 63L, 65L, 67L, 68L, 70L,
71L, 73L, 75L, 76L, 89L, 90L, 92L, 94L, 95L, 103L, 104L, 106L,
108L, 109L, 111L, 128L, 130L, 132L, 133L, 135L), .Label = c("AT02",
"AT04", "AT06", "AT14", "AT16", "BE02", "BE04", "BE06", "BE08",
"BE10", "BE12", "BE14", "BE16", "BG06", "BG08", "BG10", "BG12",
"CH14", "CZ02", "CZ04", "CZ08", "CZ10", "CZ12", "CZ14", "CZ16",
"DE02", "DE04", "DE06", "DE08", "DE10", "DE12", "DE14", "DE16",
"DK02", "DK04", "DK06", "DK08", "DK10", "DK12", "DK14", "EE04",
"EE06", "EE08", "EE10", "EE12", "EE14", "EE16", "ES02", "ES04",
"ES06", "ES08", "ES10", "ES12", "ES14", "ES16", "FI02", "FI04",
"FI06", "FI08", "FI10", "FI12", "FI14", "FI16", "FR06", "FR08",
"FR10", "FR12", "FR14", "FR16", "GB02", "GB04", "GB06", "GB08",
"GB10", "GB12", "GB14", "GB16", "GR02", "GR04", "GR08", "GR10",
"HU02", "HU06", "HU08", "HU10", "HU12", "HU14", "HU16", "IE02",
"IE04", "IE06", "IE08", "IE10", "IE12", "IE14", "IE16", "IT04",
"IT12", "IT16", "LT10", "LT12", "LT14", "NL02", "NL04", "NL06",
"NL08", "NL10", "NL12", "NL14", "NL16", "NO14", "PL02", "PL04",
"PL06", "PL08", "PL10", "PL12", "PL14", "PL16", "PT02", "PT04",
"PT06", "PT08", "PT10", "PT12", "PT14", "PT16", "SE02", "SE04",
"SE06", "SE08", "SE10", "SE12", "SE14", "SE16", "SI02", "SI04",
"SI06", "SI08", "SI10", "SI12", "SI14", "SI16", "SK04", "SK06",
"SK08", "SK10", "SK12"), class = "factor"), facet = structure(c(1L,
3L, 1L, 4L, 5L, 3L, 4L, 1L, 1L, 1L, 5L, 5L, 4L, 5L, 3L, 1L, 2L,
4L, 5L, 2L, 1L, 4L, 2L, 5L, 2L, 3L, 4L, 3L, 2L, 5L, 5L, 4L, 2L,
5L, 4L, 5L, 3L, 1L, 4L, 5L, 3L, 5L, 4L, 1L, 5L, 2L, 4L, 1L), .Label = c("Intercept",
"Extreme Left", "Center", "Right", "Extreme Right"), class = "factor"),
conf.low = c(1.16824810706745, 0.686215051613965, 0.910277310292764,
0.591705078386698, 0.37357342399703, 0.947951001435781, 0.663296044193037,
1.18794112232166, 1.06645119085865, 2.33578182814618, 0.580210898576738,
0.564235690522211, 0.530859530342114, 0.516191258265551,
0.730992343373883, 0.862424540370486, 0.827891784352444,
0.427638276259852, 0.275692447335368, 0.829763907986328,
0.370078643492081, 0.321852705445509, 0.83550621863293, 0.289836810427436,
0.847226120408727, 0.780056160572728, 0.873143885861924,
0.869757467125519, 0.615741777890997, 0.649483531741787,
0.349657606457465, 0.523294407847395, 0.670109418373736,
0.36656743494149, 0.952201390937053, 0.777207016700884, 0.888128473009524,
0.397085597526946, 0.479828726362257, 0.614533313431094,
0.813336887981082, 0.3129232351085, 0.61435321820328, 0.854801028643867,
0.346698059397102, 0.805414039007076, 0.434676644041643,
1.07780736338027), conf.high = c(1.70315275860739, 1.06494933995261,
1.47855797769819, 1.28312522319126, 1.7272277157504, 1.45743211956315,
1.49652679976667, 1.8925358720741, 1.67802460909168, 3.19512520208851,
2.44607918797515, 2.25369471581694, 1.05041423643869, 1.8828182806291,
1.13872035780431, 1.44368725318228, 1.79353596677755, 1.06769546329854,
1.16593171156554, 2.11938292490653, 0.729667639003753, 1.08526995489865,
2.05223919950836, 1.29954170985538, 1.51498719434776, 1.15888977865399,
1.54095070825389, 1.4292376699955, 1.47610807594453, 2.97492484321718,
1.80395225460704, 1.18824770090216, 1.52521060717706, 2.19697554354282,
2.01136404338166, 2.37122858469145, 1.29357889999432, 0.740322123703373,
1.19469713534712, 3.38237391450413, 1.35145693795059, 1.90755095606211,
1.25847381058047, 1.39942645489832, 1.60297301142912, 1.82417470710871,
0.907332092210651, 1.68753999308876)), row.names = c(1L,
9L, 17L, 25L, 33L, 41L, 49L, 57L, 65L, 128L, 136L, 144L, 152L,
160L, 168L, 176L, 184L, 192L, 200L, 283L, 291L, 299L, 307L, 315L,
323L, 331L, 339L, 347L, 355L, 363L, 371L, 379L, 442L, 450L, 458L,
466L, 474L, 512L, 520L, 528L, 536L, 544L, 552L, 640L, 648L, 656L,
664L, 672L), class = "data.frame")

R - Lattice xyplot - How do you add error bars to groups and summary lines?

I'm posting this question because the very similar question here has not been answered until now.
I have been asked to plot the mean +/- SEM of my whole cohort of patients over the xyplot() that depicts the values of all patients. The data used represents intraoperative cardiovascular findings from patients undergoing surgery.
This is my data.frame called df
dput(df)
structure(list(Name = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("DE", "JS", "KG", "MK", "TG", "WT"), class = "factor"),
Time = structure(c(1L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L,
4L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L), .Label = c("T1", "T2", "T3", "T4", "T5", "T6", "T7",
"T8"), class = "factor"), Dobut = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L), .Label = c("No", "Yes"
), class = "factor"), DobutDose = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
4L, 6L, 8L, 8L, 8L, 8L, 8L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 5L, 5L, NA), CI = c(1.4, 2.3, 1.3, 1.8, 2.1,
2, 2.1, 2.1, 2.3, 1.9, 1.6, 2, 2.4, 2.7, 2.6, 2.7, 2.6, 2.3,
2.4, 2.6, 0.9, 2.5, 2.1, 1.6, 1.5, 1.8, 2, 2, 1.9, 2.1, 2.3,
2, 2.4, 2.3, 2.6, 2.4, 2, 2.2, 1.6, 2.1, 2.5, 2.8), SvO2 = c(57L,
65L, 47L, 45L, 51L, 60L, 56L, 70L, 85L, 75L, 79L, 82L, 73L,
77L, 78L, 73L, 71L, 73L, 80L, 74L, 41L, 66L, 51L, 51L, 49L,
54L, 68L, 48L, 80L, 70L, 71L, 69L, 74L, 79L, 77L, 77L, 75L,
74L, 70L, 79L, 80L, 79L), SVRI = c(4000L, 1983L, 4000L, 2444L,
1981L, 2120L, 2514L, 2971L, 2157L, 3747L, 4300L, 3200L, 2867L,
1778L, 1169L, 1215L, 1262L, 1461L, 1600L, 1692L, 4978L, 1760L,
2019L, 2650L, 2827L, 2356L, 1800L, 2840L, 2063L, 2248L, 1948L,
2160L, 1733L, 2296L, 2677L, 2100L, 2640L, 2655L, 3950L, 2210L,
2848L, 2543L), MAP = c(80L, 65L, 86L, 74L, 67L, 65L, 74L,
90L, 70L, 90L, 96L, 94L, 100L, 82L, 60L, 61L, 62L, 62L, 69L,
71L, 70L, 71L, 77L, 73L, 75L, 77L, 61L, 85L, 65L, 74L, 70L,
67L, 69L, 74L, 92L, 71L, 88L, 93L, 89L, 79L, 97L, 97L), CVP = c(10L,
8L, 21L, 19L, 15L, 12L, 8L, 12L, 8L, 11L, 10L, 14L, 14L,
22L, 22L, 20L, 21L, 20L, 21L, 16L, 14L, 16L, 24L, 20L, 22L,
24L, 16L, 14L, 16L, 15L, 14L, 13L, 17L, 8L, 5L, 8L, 22L,
20L, 20L, 21L, 8L, 8L), PAP = c(23L, 22L, 36L, 36L, 34L,
32L, 22L, 33L, 28L, 36L, 36L, 40L, 37L, 37L, 40L, 35L, 35L,
34L, 38L, 36L, 45L, 43L, 55L, 49L, 52L, 54L, 43L, 47L, 27L,
25L, 23L, 22L, 28L, 21L, 20L, 25L, 33L, 33L, 38L, 35L, 33L,
29L), PCWP = c(15L, 11L, 28L, 26L, 23L, 21L, 11L, 26L, NA,
NA, 25L, 25L, NA, 27L, NA, NA, NA, NA, NA, NA, 30L, NA, NA,
NA, NA, NA, NA, NA, 19L, NA, NA, NA, NA, NA, 16L, NA, NA,
NA, NA, NA, NA, NA)), .Names = c("Name", "Time", "Dobut",
"DobutDose", "CI", "SvO2", "SVRI", "MAP", "CVP", "PAP", "PCWP"
), class = "data.frame", row.names = c(NA, -42L))
Now the first xyplot I made for the variable CI looks like this
require(lattice)
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
+ ,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
Now I was able to add the mean (black line) of the whole cohort, by doing the following
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
}
,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
Now I'd like to add +/- SE to the mean as a line above/below the mean, but nowhere can I find how to do this.
What I can do is using the latticeExtra package is add the loess line +/- SE, as below, but that's not the correct mathematical function I'm looking for. I've left the mean line in there to illustrate the difference between the two.
require(latticeExtra)
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
+ panel = function(x, y, ...) {
+ panel.xyplot(x, y, ...)
+ panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
+ panel.smoother(x,y,se=TRUE, col.se="grey")
+ }
+ ,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
I have performed an extensive search through SO and the internet, but I haven't been able to find the right function to do this.
Help is very much appreciated! Thanks.
You could create your own panel function to plot a +/- SD region. For example
#new panel function
panel.se <- function(x, y, col.se=plot.line$col, alpha.se=.25, ...) {
plot.line <- trellis.par.get("plot.line")
xs <- if(is.factor(x)) {
factor(c(levels(x) , rev(levels(x))), levels=levels(x))
} else {
xx <- sort(unique(x))
c(xx, rev(xx))
}
means <- tapply(y,x, mean, na.rm=T)
vars <- tapply(y,x, var, na.rm=T)
Ns <- tapply(!is.na(y),x, sum)
ses <- sqrt(vars/Ns)
panel.polygon(xs, c(means+ses, rev(means-ses)), col=col.se, alpha=alpha.se)
}
and then you can use it like
#include new panel function
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.se(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
}
,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
which results in

Modify values in a column subject to condition?

I know that this is a very silly question but I cannot work out how to do it.
I want to subtract 300 from the values on each row on the trial1 column if they are larger than 299.
I tried:
sums[sums$trial1 > 299, ][,"trial1"] -= 300
but didn't work. SO far the only way I managed to get it to work is by splitting the data.frame using subset and then modifying it with :
sums$trial1 = sums$trial1 - 300
and then using rbind(). I am pretty sure that using subset and rbind is overkill, but I haven't been able to find a direct way yet...
I used dput to get a sample of my data.frame.
structure(list(part_no = c(10L, 10L, 10L, 10L, 10L, 10L), trial1 = c(294L,
296L, 298L, 300L, 302L, 304L), trial2 = c(295L, 297L, 299L, 301L,
303L, 305L), id1 = c(1.5, 1.5, 1.5, 2, 2, 2), id2 = c(1.5, 1.5,
1.5, 2, 2, 2), dist1 = c(141L, 141L, 115L, 126L, 177L, 141L),
width1 = c(77L, 77L, 63L, 42L, 59L, 47L), dist2 = c(143L,
135L, 146L, 255L, 327L, 369L), width2 = c(78L, 74L, 80L,
85L, 109L, 123L), ttime1 = c(1752L, 1681L, 1664L, 1798L,
1664L, 1697L), ttime2 = c(2563L, 1849L, 2067L, 1933L, 2118L,
2245L), no_clicks1 = c(8L, 8L, 8L, 8L, 8L, 8L), no_clicks2 = c(8L,
8L, 8L, 8L, 8L, 8L), no_ontarget1 = c(7L, 8L, 8L, 8L, 8L,
8L), no_ontarget2 = c(8L, 8L, 8L, 4L, 7L, 8L), e1 = c(1L,
0L, 0L, 0L, 0L, 0L), e2 = c(0L, 0L, 0L, 4L, 1L, 0L), rating = c(252,
252, 252, 252, 252, 252), prat = c(0.8, 0.8, 0.8, 0.8, 0.8,
0.8), ptim = c(-46.2899543378995, -9.9940511600238, -24.21875,
-7.5083426028921, -27.2836538461538, -32.2922804949912),
ptdiff = c(-47.0899543378995, -10.7940511600238, -25.01875,
-8.3083426028921, -28.0836538461538, -33.0922804949912),
pdist = c(-1.41843971631206, 4.25531914893617, -26.9565217391304,
-102.380952380952, -84.7457627118644, -161.702127659574),
pddiff = c(-2.21843971631206, 3.45531914893617, -27.7565217391304,
-103.180952380952, -85.5457627118644, -162.502127659574),
perr = c(100, NaN, NaN, -Inf, -Inf, NaN), pediff = c(99.2,
NaN, NaN, -Inf, -Inf, NaN)), .Names = c("part_no", "trial1",
"trial2", "id1", "id2", "dist1", "width1", "dist2", "width2",
"ttime1", "ttime2", "no_clicks1", "no_clicks2", "no_ontarget1",
"no_ontarget2", "e1", "e2", "rating", "prat", "ptim", "ptdiff",
"pdist", "pddiff", "perr", "pediff"), row.names = 148:153, class = "data.frame")
Thanks!
Your first attempt was close but there is no -= operator in base R, so you need to supply the subset on the right hand side as well.
sums[sums$trial1 > 299,"trial1"] <- sums[sums$trial1 > 299,"trial1"]-300
Why not just use mod(x,300) ?
[charcountfiller]

Resources