Centering column names with wrapped text with Kable - r

I am having some trouble centering column names that have wrapped text. The top line of the wrapped text centers but the second line does not.
test_data <- data.frame(Mean = runif(5, 3.71, 7.72),
N = sample(57:59, 5, replace = TRUE),
sd = c(1, rep(0, 4)),
d = rep(1, 5),
naod = sample(1:4, 5, replace = TRUE),
a = sample(5:12, 5, replace = TRUE),
sa = sample(37:44, 5, replace = TRUE)
test <-as.data.frame(t(as.matrix(sapply(2:6,function(i) vec_fun5(test_Data,i)))))
kable(test,"latex" ,booktabs=T, align="c",col.names=linebreak(c('Mean','\\textit{N}' ,'Strongly\n Disagree','Disagree','Neither Agree\n or Disagree','Agree','Strongly\n Agree')),row.names = T,escape=F)%>%
row_spec(0,align = "c")
Output Table
I would like to have both lines centered within the cell.

You could use tableHTML for that:
Test data:
set.seed(1)
test_data <- data.frame(Mean = runif(5, 3.71, 7.72),
N = sample(57:59, 5, replace = TRUE),
sd = c(1, rep(0, 4)),
d = rep(1, 5),
naod = sample(1:4, 5, replace = TRUE),
a = sample(5:12, 5, replace = TRUE),
sa = sample(37:44, 5, replace = TRUE))
library(tableHTML)
test_data %>%
tableHTML(round = 2,
widths = c(50, 50, 50, 50,
80, 120, 50, 50),
headers = c("Mean", "N",
"Strongly <br>Disagree",
"Disagree",
"Neither Agree <br> or Disagree",
"Agree",
"Strongly <br>Agree"),
escape = FALSE) %>%
add_theme("scientific")
The result looks like this:

Related

Match two equal-sized data.frames and then filter results on a third

I have the following three data.frame:
area1 <- data.frame(ua = c(1, 2, 3),
sub_ua1 = c(0, 100, 0),
sub_ua2 = c(100, 100, 100),
sub_ua3 = c(100, 0, 0))
area2 <- data.frame(ua = c(1, 2, 3),
sub_ua1 = c(100, 100, 0),
sub_ua2 = c(100, 100, 0),
sub_ua3 = c(100, 0, 0))
df <- data.frame(ua = c(rep(1, 5), rep(2, 4), rep(3, 7)),
subua = c(rep("sub_ua1", 3), "sub_ua2", "sub_ua3",
"sub_ua1", "sub_ua1", "sub_ua2", "sub_ua3",
"sub_ua1", c(rep("sub_ua2", 2)), rep("sub_ua3", 4)),
value = c(rep(2, 3), rep(4, 3), rep(2, 2), rep(1, 8)))
What I'm trying to do is, based on column ua in dfs area_1 and area_2, filter only sub_ua (1 to 3) that have a match of 100 in each df. For example, the first value of sub_ua2 is 100 in both area_1 and area_2. This is a "sub_ua" I want.
Then, after having this list of "sub_ua" per "ua", filter only them on df to obtain the filtered value.
The results should be:
For ua == 1, get both sub_ua2 and sub_ua3
For ua == 2, get both sub_ua1 and sub_ua2
For ua == 3, get sub_ua2
EDIT:
I was using the following approach to obtain a data.frame of rows and columns indices:
library(prodlim)
# Indices for data frame 1 and 2 for values = 100
indices_1 <- which(area1 == 100, arr.ind = TRUE)
indices_2 <- which(area2 == 100, arr.ind = TRUE)
# Rows where indices are matched between the two data frame indices
indices_rows <- na.omit(row.match(as.data.frame(indices_1), as.data.frame(indices_2)))
# Row-column indices where both data frames have values of 100
indices_2[indices_rows, ]
I just don't know how to use this to filter in the final dataset df
If I understood correctly this should work:
area1 <- data.frame(ua = c(1, 2, 3),
sub_ua1 = c(0, 100, 0),
sub_ua2 = c(100, 100, 100),
sub_ua3 = c(100, 0, 0))
area2 <- data.frame(ua = c(1, 2, 3),
sub_ua1 = c(100, 100, 0),
sub_ua2 = c(100, 100, 0),
sub_ua3 = c(100, 0, 0))
library(dplyr)
library(tidyr)
area1 %>%
left_join(area2, by = "ua", suffix = c(".area1",".area2")) %>%
pivot_longer(cols = -ua,names_to = "var",values_to = "value") %>%
separate(col = var,into = c("var","area"),sep = "\\.") %>%
pivot_wider(names_from = area,values_from = value) %>%
filter(area1 == 100, area2 == 100) %>%
select(-starts_with("area"))
# A tibble: 4 x 2
ua var
<dbl> <chr>
1 1 sub_ua2
2 1 sub_ua3
3 2 sub_ua1
4 2 sub_ua2

How to apply functions depending on the column, and mutate into new data frame?

I came up with the idea to represent stats on a chart like this. Example of the plot. And made it like this.
df_n <- df_normalized %>%
transmute(
Height_x = round(Height*cos_my(45), 2),
Height_y = round(Height*sin_my(45), 2),
Weight_x = round(Weight*cos_my(45*2), 2),
Weight_y = round(Weight*sin_my(45*2), 2),
Reach_x = round(Reach*cos_my(45*3), 2),
Reach_y = round(Reach*sin_my(45*3), 2),
SLpM_x = round(SLpM*cos_my(45*4), 2),
SLpM_y = round(SLpM*sin_my(45*4), 2),
Str_Def_x = round(`Str_Def %`*cos_my(45*5), 2),
Str_Def_y = round(`Str_Def %`*sin_my(45*5), 2),
TD_Avg_x = round(TD_Avg*cos_my(45*6), 2),
TD_Avg_y = round(TD_Avg*sin_my(45*6), 2),
TD_Acc_x = round(`TD_Acc %`*cos_my(45*7), 2),
TD_Acc_y = round(`TD_Acc %`*sin_my(45*7), 2),
Sub_Avg_x = round(Sub_Avg*cos_my(45*8), 2),
Sub_Avg_y = round(Sub_Avg*sin_my(45*8), 2))
Now I want to do this smart way, so I created a data frame with same number of rows empty_df, and later in for loop I try to mutate and array, with every iteration. So for example I want to multiply 1st column by cos(30), 2nd by cos(30*2), and so on
But...
It mutate only last column because all columns during iteration have the same name 'column'.
I want to name each column by the variable column, made with paste0().
reprex_df <- structure(list(Height = c(190, 180, 183, 196, 185),
Weight = c(120, 77, 93, 120, 84),
Reach = c(193, 180, 188, 203, 193),
SLpM = c(2.45, 3.8, 2.05, 7.09, 3.17),
`Str_Def %` = c(58, 56, 55, 34, 44),
TD_Avg = c(1.23, 0.33, 0.64, 0.91, 0),
`TD_Acc %` = c(24, 50, 20, 66, 0),
Sub_Avg = c(0.2, 0, 0, 0, 0)), row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame"))
temp <- apply(reprex_df[,1], function(x) x*cos(60), MARGIN = 2)
temp
empty_df <- data.frame(first_column = replicate(length(temp),1))
for (x in 1:8) {
temp <- apply(df[,x], function(x) round(x*cos((360/8)*x),2), MARGIN = 2)
column <- paste0("Column_",x)
empty_df <- mutate(empty_df, column = temp)
}
Later I want to make it a function where I can pass data frame and receive data frame with X, and Y coordinates.
So, how should I make it?
Perhaps this helps
library(purrr)
library(stringr)
nm1 <- names(reprex_df)
nm_cos <- str_c(names(reprex_df), "_x")
nm_sin <- str_c(names(reprex_df), "_y")
reprex_df[nm_cos] <- map2(reprex_df, seq_along(nm1),
~ round(.x * cos(45 *.y ), 2))
reprex_df[nm_sin] <- map2(reprex_df[nm1], seq_along(nm1),
~ round(.x * sin(45 *.y ), 2))

R data.table keys and column names. Harmonisation

I am trying to set keys yo a data.table and keep the original column names on the second row. All that I have tried so far changes the column names to keys and erases the original variables. I have ten data.tables to merge and all the variables have different names like in the example. So I made keys but would like to keep the originals as well before harmonisation just to be sure.
library(tidyverse)
library(lubridate)
library(forcats)
library(stringr)
library(data.table)
library(rio)
library(dplyr)
1. Keys
keys1 <- c("SDC_GENDER","SDC_CHILD_NB","LAB_CRP","PM_HIP")
keys2 <- c("SDC_GENDER","SDC_CHILD_NB","LAB_CRP","PM_HIP")
2. data.table example with variable names.
TD3 = data.table(q128 = c(1, 2, 1, 2), q129 = c(1, 5, 2, 4), q130 = c(0.8, 3.0, 10.0, NA), q131 = c(55, 56, 80, 79))
TD3
TD4 = data.table(q128 = c(1, 1, 1, 2), q129 = c(1, 3, 2, 999), q130 = c(0.9, 3.1, NA, 9.0), q131 = c(58, 60, 45, NA))
TD4
I'm not sure this is really the data structure you want to have, that is to have mixed variable types like r2evans said. However...this solution works. Just put all your little data.tables into a list and voila.
I noticed that keys1 and keys2 are identical, so I just used one of them. If they should be different keys for each they can also be listed.
keys1 <- c("SDC_GENDER","SDC_CHILD_NB","LAB_CRP","PM_HIP")
TD <- list()
TD[[1]] = data.table(q128 = c(1, 2, 1, 2), q129 = c(1, 5, 2, 4), q130 = c(0.8, 3.0, 10.0, NA), q131 = c(55, 56, 80, 79))
TD[[2]] = data.table(q128 = c(1, 1, 1, 2), q129 = c(1, 3, 2, 999), q130 = c(0.9, 3.1, NA, 9.0), q131 = c(58, 60, 45, NA))
TD <- lapply(TD, FUN = function(x){
oldcolumns <- colnames(x)
td <- data.table(
'V1' = oldcolumns[1],
'V2' = oldcolumns[2],
'V3' = oldcolumns[3],
'V4' = oldcolumns[4]
)
colnames(td) <- keys1
colnames(x) <- keys1
x <- rbind(td, x)
return(x)
})

R Highcharter specify continuous x and y axis

I am a bit confused with use of highcharter hc_add_series function.
I am trying to create a plot where I need to specify both x and y axis, where x axis are continuous. I have a data-frame, for example:
df_plot <- cbind(
seq(0, 1, by = 0.1),
sample(seq(from = 100, to = 300, by = 10), size = 11, replace = TRUE),
sample(seq(from = 1, to = 100, by = 9), size = 11, replace = TRUE),
sample(seq(from = 50, to = 60, by = 2), size = 11, replace = TRUE),
sample(seq(from = 100, to = 130, by = 1), size = 1, replace = TRUE)
) %>%
as.data.frame()
names(df_plot) <- c("x", "a", "b", "c", "d")
I saw this example that works
highchart() %>%
hc_add_series(data = purrr::map(4:8, function(x) list(x, x)), color = "blue")
So i tried:
df_plot1 <- Map(cbind, split.default(df_plot[-1], names(df_plot)[-1]), x=df_plot[1])
highchart() %>%
hc_add_series(data = df_plot1[[1]]) %>%
hc_add_series(data = df_plot1[[2]], yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#7cb5ec', title=list(text="First y-axis")),
list(lineWidth = 3, lineColor="#434348", title=list(text="Second y-axis")))
However, I am getting "No data to display" on the plot, so I obviously went wrong somewhere.
Also, I cannot use hchart function, as I need have multiple y axis
After reading docs about split.default it Divide into Groups and Reassemble, however you need to access the variable you want to plot, e.g. df_plot1[[1]$a, like so:
library(highcharter)
df_plot <- cbind(
seq(0, 1, by = 0.1),
sample(seq(from = 100, to = 300, by = 10), size = 11, replace = TRUE),
sample(seq(from = 1, to = 100, by = 9), size = 11, replace = TRUE),
sample(seq(from = 50, to = 60, by = 2), size = 11, replace = TRUE),
sample(seq(from = 100, to = 130, by = 1), size = 1, replace = TRUE)
) %>% as.data.frame()
names(df_plot) <- c("x", "a", "b", "c", "d")
df_plot1 <- Map(cbind, split.default(df_plot[-1], names(df_plot)[-1]), x=df_plot[1])
highchart() %>%
hc_xAxis(categories = df_plot1[[1]]$x) %>%
hc_add_series(data = df_plot1[[1]]$a) %>%
hc_add_series(data = df_plot1[[2]]$b, yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#7cb5ec', title=list(text="First y-axis")),
list(lineWidth = 3, lineColor="#434348", title=list(text="Second y-axis")))
not sure if this can help you,
library(tidyr)
df_plot2 <- gather(df_plot, group, y, -x)
hchart(df_plot2, "line", hcaes(x, y, group = group))
hchart(df_plot2, "line", hcaes(x, y, group = group), yAxis = 0:3) %>%
hc_yAxis_multiples(
list(lineWidth = 3, title=list(text="First y-axis")),
list(lineWidth = 3, title=list(text="Second y-axis")),
list(lineWidth = 3, title=list(text="3rd y-axis")),
list(lineWidth = 3, title=list(text="4th y-axis"))
)

crossJoining two data frames without repeating values

I have two dataframes
DataFrame1 <- data.frame(StudentId = c(1:20), Subject = c(rep("Algebra", 4), rep("Geometry", 4), rep("English", 4), rep("Zoology", 4), rep("Botany", 4)), CGPA = c(random::randomNumbers(20, 70, 100, 1)), Country = c(rep("USA", 4), rep("UK", 4), rep("Germany", 4), rep("France", 4), rep("Japan", 4)))
and
DataFrame2 <- data.frame(StudentId = c(1:10), State = c(rep("NYC", 2), rep("Illinois", 2), rep("Texas", 2), rep("Virginia", 2), rep("Florida", 2)), Age = c(random::randomNumbers(10, 16, 20, 1)), Gender = c(rep("Male", 3), rep("Female", 3), rep("Male", 2), rep("Female", 2)))
I can merge the above two using inner join as
merge(DataFrame1, DataFrame2)
How to merge as cross Joining two data frames without repeating values?
Try merge(DataFrame1, DataFrame2, all = T)
Try this for cross join..
knitr::kable(merge(x = DataFrame1, y = DataFrame2, by = NULL))

Resources