How to convert dot in comma in data.frame

How to convert dot in comma in data.frame - r

I would like to convert the dot (decimal separator) to comma as decimal separator.
I tried using format(decimal.mark=",") but got an error.
df<-structure(list(ponto = c("F01", "F02", "F03", "F04", "F05", "F06"
), `Vegetação Nativa` = c(0.09, 3.12, 8.22, 5.92, 1.95, 4.7),
Agricultura = c(91.78, 91.87, 100, 100, 91.5, 99.38), Pastagem = c(-16.99,
-33.16, -22.73, -24.12, -38, -47.3), `Área Urbana` = c(27.32,
27.32, 27.57, 27.57, 19.18, NaN), `Solo Exposto` = c(10.04,
2.13, 8.5, 6.64, -29.35, -442.86), `Corpo Hídrico` = c(-15.62,
-15.62, NaN, NaN, -17.11, -25.93)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
ponto = c("F01", "F02", "F03", "F04", "F05", "F06"), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), .drop = TRUE))
I tried this, but got an error:
df%>%
format(decimal.mark=",")

One way is to use mutate and across from dplyr. Though this will still change their type to character.
library(dplyr)
df %>%
mutate(across(everything(), format, decimal.mark = ","))
Output
# A tibble: 6 × 7
# Groups: ponto [6]
ponto `Vegetação Nativa` Agricultura Pastagem `Área Urbana` `Solo Exposto` `Corpo Hídrico`
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 F01 0,09 91,78 -16,99 27,32 10,04 -15,62
2 F02 3,12 91,87 -33,16 27,32 2,13 -15,62
3 F03 8,22 100 -22,73 27,57 8,5 NaN
4 F04 5,92 100 -24,12 27,57 6,64 NaN
5 F05 1,95 91,5 -38 19,18 -29,35 -17,11
6 F06 4,7 99,38 -47,3 NaN -442,86 -25,93
Additionally, if you are wanting to simply change how you are seeing the data while printing, plotting, etc. for anything that is as.character, then you can change the default options. You can also read more about it here (this post has a lot of discussion directly related to your question).
options(OutDec= ",")
Examples (after changing options):
c(1.5, 3.456, 40000.89)
# [1] 1,500 3,456 40000,890
However, the caveat is that the data must be character. So with your data, we could convert those to character, then they will display with the comma rather than period.
df %>% mutate(across(everything(), as.character))
# A tibble: 6 × 7
# Groups: ponto [6]
ponto `Vegetação Nativa` Agricultura Pastagem `Área Urbana` `Solo Exposto` `Corpo Hídrico`
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 F01 0,09 91,78 -16,99 27,32 10,04 -15,62
2 F02 3,12 91,87 -33,16 27,32 2,13 -15,62
3 F03 8,22 100 -22,73 27,57 8,5 NaN
4 F04 5,92 100 -24,12 27,57 6,64 NaN
5 F05 1,95 91,5 -38 19,18 -29,35 -17,11
6 F06 4,7 99,38 -47,3 NaN -442,86 -25,93

Related

How to extract the minimum value in all columns in a dataframe in R?

I am working on mass spectrometry proteomics expression data. For statistical analysis of the data, I have to find the top three minimum value of each column in the dataframe like below,
structure(list(Type = c("knn_vsn", "knn_loess", "knn_rlr", "lls_vsn",
"lls_loess", "lls_rlr", "svd_vsn", "svd_loess", "svd_rlr"), Group1 = c(0.00318368971435714,
0.00317086486813191, 0.00317086486813191, 0.00312821095645019,
0.00311632537571597, 0.00313568333628438, 0.00394831935666465,
0.00393605637633005, 0.00395599132474446), Group2 = c(0.0056588221783197,
0.00560933517836751, 0.00560933517836751, 0.00550114679857588,
0.00548316209864631, 0.00550230673346083, 0.00737865310351839,
0.0073411154394253, 0.00735748595511963), Group3 = c(0.00418838138878096,
0.00417201215938804, 0.00417201215938804, 0.00398819978362592,
0.00397093259462351, 0.00398827962107259, 0.00424157479553304,
0.00422638750183658, 0.00424175886713471), Group4 = c(0.0039811913527127,
0.00394649435912413, 0.00394649435912413, 0.00397059873107098,
0.00393840233766712, 0.00396385071387178, 0.0041077267588457,
0.00407577176849463, 0.00410191492380459)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), groups = structure(list(
Type = c("knn_loess", "knn_rlr", "knn_vsn", "lls_loess",
"lls_rlr", "lls_vsn", "svd_loess", "svd_rlr", "svd_vsn"),
.rows = structure(list(2L, 3L, 1L, 5L, 6L, 4L, 8L, 9L, 7L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -9L), .drop = TRUE))
And I need the output like below,\
structure(list(`Type ` = c("lls_loess", "lls_rlr", "lls_vsn"),
Group1 = c(0.00311632537571597, 0.00313568333628438, 0.00312821095645019
), ` Type` = c("lls_loess", "lls_rlr", "lls_vsn"), Group2 = c(0.00548316209864631,
0.00550230673346083, 0.00550114679857588), ` Type` = c("lls_loess",
"lls_rlr", "lls_vsn"), Group3 = c(0.00397093259462351, 0.00398827962107259,
0.00398819978362592), `Type ` = c("lls_loess", "lls_rlr",
"lls_vsn"), Group4 = c(0.00393840233766712, 0.00396385071387178,
0.00397059873107098)), class = "data.frame", row.names = c(NA,
-3L))
Please suggest some useful R code for this issue.
Thank you in advance.

library(tidyverse)
df %>%
pivot_longer(-Type) %>%
group_by(name) %>%
slice_min(value, n = 3) %>% # You might stop here, already tidy
mutate(row = row_number()) %>%
ungroup() %>%
pivot_wider(names_from = name, values_from = c(Type, value),
names_vary = "slowest")
Result
# A tibble: 3 × 9
row Type_Group1 value_Group1 Type_Group2 value_Group2 Type_Group3 value_Group3 Type_Group4 value_Group4
<int> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl>
1 1 lls_loess 0.00312 lls_loess 0.00548 lls_loess 0.00397 lls_loess 0.00394
2 2 lls_vsn 0.00313 lls_vsn 0.00550 lls_vsn 0.00399 knn_loess 0.00395
3 3 lls_rlr 0.00314 lls_rlr 0.00550 lls_rlr 0.00399 knn_rlr 0.00395

Take a look at this
data <- data.frame(group = rep(letters[1:3], each = 5),data
value = 1:3)
data

Another possible solution, based on purrr::imap_dfc:
library(tidyverse)
imap_dfc(2:ncol(df), ~ df %>% ungroup %>% .[c(1,.x)] %>%
slice_min(df[[.x]], n = 3) %>% set_names(c(paste0("Type",.y), names(df)[.x])))
#> # A tibble: 3 × 8
#> Type1 Group1 Type2 Group2 Type3 Group3 Type4 Group4
#> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl>
#> 1 lls_loess 0.00312 lls_loess 0.00548 lls_loess 0.00397 lls_loess 0.00394
#> 2 lls_vsn 0.00313 lls_vsn 0.00550 lls_vsn 0.00399 knn_loess 0.00395
#> 3 lls_rlr 0.00314 lls_rlr 0.00550 lls_rlr 0.00399 knn_rlr 0.00395
Note: Your original data is grouped: that is why I use ungroup in my solution.

Selecting variable column names for further IRR calculation in R

I have a table of cash flows for various projects over time (years) and want to calculate the IRR for each project. I can't seem to select the appropriate columns, which vary, for each project. The table structure is as follows:
structure(list(`Portfolio Company` = c("Ventures II", "Pal III",
"River Fund II", "Ventures III"),
minc = c(2007, 2008, 2008, 2012),
maxc = c(2021, 2021, 2021, 2020),
num_pers = c(14, 13, 13, 8),
`2007` = c(-660000, NA, NA, NA),
`2008` = c(-525000, -954219, -1427182.55, NA),
`2009` = c(-351991.03, -626798, -1694353.41, NA),
`2010` = c(-299717.06, -243248, -1193954, NA),
`2011` = c(-239257.08, 465738, -288309, NA),
`2012` = c(-9057.31000000001, -369011, 128509.63, -480000),
`2013` = c(-237233.9, -131111, 53718, -411734.58),
`2014` = c(-106181.76, -271181, 887640, -600000),
`2015` = c(-84760.51, 441808, 906289, -900000),
`2016` = c(2770719.21, -377799, 166110, -150000),
`2017` = c(157820.08, -12147, 1425198, -255000),
`2018` = c(204424.36,-1626110, 361270, -180000),
`2019` = c(563463.62, 119577, 531555, 3300402.62),
`2020` = c(96247.29, 7057926, 2247027, 36111.6),
`2021` = c(614848.68, 1277996, 258289, NA)),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L),
groups = structure(list(`Portfolio Company` =c("Ventures II","Ventures III","Pal III", "River Fund II"),
.rows = structure(list(1L, 4L, 2L, 3L),
ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L), .drop = TRUE))
Each project (Portfolio Company) has a different start and end date which is captured by the minc and maxc columns. I would like to use the text in minc and maxc to select from minc:maxc for each project to perform the IRR calculation. I get a variety of errors including: object maxc not found, incorrect arg ... Have tried about 20 combinations of !!sym, as.String (from NLP package) ... none works.
This is the code that created the table and the problematic select code:
sum_fund_CF <- funds %>% group_by(`TX_YR`, `Portfolio Company`) %>%
summarise(CF=sum(if_else(is.na(Proceeds),0,Proceeds)-if_else(is.na(Investment),0,Investment))) %>% ungroup() #organizes source data and calculates cash flows
sum_fund_CF <- sum_fund_CF %>%
group_by(`Portfolio Company`) %>% mutate(minc=min(`TX_YR`),maxc=max(`TX_YR`),num_pers=maxc-minc) %>%
pivot_wider(names_from = TX_YR, values_from = `CF`) #creates the table and finds first year and last year of cash flow, and num of periods between them
sum_fund_CF %>% group_by(`Portfolio Company`)%>% select(!!sym(as.String(maxc))):!!sym(as.String(max))) #want to select appropriate columns for each record to do the IRR analysis ... IRR() ... need a string of cash flows and no NA.
I'm sure it's something simple, but this has me perplexed. Thanks !

You can modify your definition of IRR accordingly. I followed this article on how to calculate IRR using the jrvFinance package.
The filter function from the dplyr package is used after group_by, to select the years indicated by the minc and maxc columns.
library(tidyverse)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
library(jrvFinance)
data <- structure(list(`Portfolio Company` = c("Ventures II", "Pal III",
"River Fund II", "Ventures III"),
minc = c(2007, 2008, 2008, 2012),
maxc = c(2021, 2021, 2021, 2020),
num_pers = c(14, 13, 13, 8),
`2007` = c(-660000, NA, NA, NA),
`2008` = c(-525000, -954219, -1427182.55, NA),
`2009` = c(-351991.03, -626798, -1694353.41, NA),
`2010` = c(-299717.06, -243248, -1193954, NA),
`2011` = c(-239257.08, 465738, -288309, NA),
`2012` = c(-9057.31000000001, -369011, 128509.63, -480000),
`2013` = c(-237233.9, -131111, 53718, -411734.58),
`2014` = c(-106181.76, -271181, 887640, -600000),
`2015` = c(-84760.51, 441808, 906289, -900000),
`2016` = c(2770719.21, -377799, 166110, -150000),
`2017` = c(157820.08, -12147, 1425198, -255000),
`2018` = c(204424.36,-1626110, 361270, -180000),
`2019` = c(563463.62, 119577, 531555, 3300402.62),
`2020` = c(96247.29, 7057926, 2247027, 36111.6),
`2021` = c(614848.68, 1277996, 258289, NA)),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L),
groups = structure(list(`Portfolio Company` =c("Ventures II","Ventures III","Pal III", "River Fund II"),
.rows = structure(list(1L, 4L, 2L, 3L),
ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L), .drop = TRUE))
clean_data <- data %>%
clean_names() %>%
ungroup() %>%
pivot_longer(cols = -1:-4,
names_to = "year",
values_to = "cashflow") %>%
mutate(year = str_replace(year, "x", ""),
year = as.numeric(year))
clean_data %>%
print(n = 20)
#> # A tibble: 60 x 6
#> portfolio_company minc maxc num_pers year cashflow
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Ventures II 2007 2021 14 2007 -660000
#> 2 Ventures II 2007 2021 14 2008 -525000
#> 3 Ventures II 2007 2021 14 2009 -351991.
#> 4 Ventures II 2007 2021 14 2010 -299717.
#> 5 Ventures II 2007 2021 14 2011 -239257.
#> 6 Ventures II 2007 2021 14 2012 -9057.
#> 7 Ventures II 2007 2021 14 2013 -237234.
#> 8 Ventures II 2007 2021 14 2014 -106182.
#> 9 Ventures II 2007 2021 14 2015 -84761.
#> 10 Ventures II 2007 2021 14 2016 2770719.
#> 11 Ventures II 2007 2021 14 2017 157820.
#> 12 Ventures II 2007 2021 14 2018 204424.
#> 13 Ventures II 2007 2021 14 2019 563464.
#> 14 Ventures II 2007 2021 14 2020 96247.
#> 15 Ventures II 2007 2021 14 2021 614849.
#> 16 Pal III 2008 2021 13 2007 NA
#> 17 Pal III 2008 2021 13 2008 -954219
#> 18 Pal III 2008 2021 13 2009 -626798
#> 19 Pal III 2008 2021 13 2010 -243248
#> 20 Pal III 2008 2021 13 2011 465738
#> # ... with 40 more rows
clean_data %>%
group_by(portfolio_company) %>%
filter(between(year, min(minc), max(maxc))) %>%
summarise(irr = irr(cashflow,
cf.freq = 1))
#> # A tibble: 4 x 2
#> portfolio_company irr
#> <chr> <dbl>
#> 1 Pal III 0.111
#> 2 River Fund II 0.0510
#> 3 Ventures II 0.0729
#> 4 Ventures III 0.0251
Created on 2022-01-04 by the reprex package (v2.0.1)

Another way to do it using jvrFinance::irr().
library(jrvFinance)
library(tidyverse)
df %>%
rowwise() %>%
summarise(irr = irr(na.omit(c_across(matches('^\\d')))), .groups = 'drop')
#> # A tibble: 4 × 2
#> `Portfolio Company` irr
#> <chr> <dbl>
#> 1 Ventures II 0.0729
#> 2 Pal III 0.111
#> 3 River Fund II 0.0510
#> 4 Ventures III 0.0251
Created on 2022-01-04 by the reprex package (v2.0.1)

Merge two dataframes: specifically merge a selection of columns based on two conditions?

I have two datasets on the same 2 patients. With the second dataset I want to add new information to the first, but I can't seem to get the code right.
My first (incomplete) dataset has a patient ID, measurement time (either T0 or FU1), year of birth, date of the CT scan, and two outcomes (legs_mass and total_mass):
library(tidyverse)
library(dplyr)
library(magrittr)
library(lubridate)
df1 <- structure(list(ID = c(115, 115, 370, 370), time = structure(c(1L,
6L, 1L, 6L), .Label = c("T0", "T1M0", "T1M6", "T1M12", "T2M0",
"FU1"), class = "factor"), year_of_birth = c(1970, 1970, 1961,
1961), date_ct = structure(c(16651, 17842, 16651, 18535), class = "Date"),
legs_mass = c(9.1, NA, NA, NA), total_mass = c(14.5, NA,
NA, NA)), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
"data.frame"))
# Which gives the following dataframe
df1
# A tibble: 4 x 6
ID time year_of_birth date_ct legs_mass total_mass
<dbl> <fct> <dbl> <date> <dbl> <dbl>
1 115 T0 1970 2015-08-04 9.1 14.5
2 115 FU1 1970 2018-11-07 NA NA
3 370 T0 1961 2015-08-04 NA NA
4 370 FU1 1961 2020-09-30 NA NA
The second dataset adds to the legs_mass and total_mass columns:
df2 <- structure(list(ID = c(115, 370), date_ct = structure(c(17842,
18535), class = "Date"), ctscan_label = c("PXE115_CT_20181107_xxxxx-3.tif",
"PXE370_CT_20200930_xxxxx-403.tif"), legs_mass = c(956.1, 21.3
), total_mass = c(1015.9, 21.3)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
# Which gives the following dataframe:
df2
# A tibble: 2 x 5
ID date_ct ctscan_label legs_mass total_mass
<dbl> <date> <chr> <dbl> <dbl>
1 115 2018-11-07 PXE115_CT_20181107_xxxxx-3.tif 956. 1016.
2 370 2020-09-30 PXE370_CT_20200930_xxxxx-403.tif 21.3 21.3
What I am trying to do, is...
Add the legs_mass and total_mass column values from df2 to df1, based on ID number and date_ct.
Add the new columns of df2 (the one that is not in df1; ctscan_label) to df1, also based on the date of the ct and patient ID.
So that the final dataset df3 looks as follows:
df3 <- structure(list(ID = c(115, 115, 370, 370), time = structure(c(1L,
6L, 1L, 6L), .Label = c("T0", "T1M0", "T1M6", "T1M12", "T2M0",
"FU1"), class = "factor"), year_of_birth = c(1970, 1970, 1961,
1961), date_ct = structure(c(16651, 17842, 16651, 18535), class = "Date"),
legs_mass = c(9.1, 956.1, NA, 21.3), total_mass = c(14.5,
1015.9, NA, 21.3)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
# Corresponding to the following tibble:
# A tibble: 4 x 6
ID time year_of_birth date_ct legs_mass total_mass
<dbl> <fct> <dbl> <date> <dbl> <dbl>
1 115 T0 1970 2015-08-04 9.1 14.5
2 115 FU1 1970 2018-11-07 956. 1016.
3 370 T0 1961 2015-08-04 NA NA
4 370 FU1 1961 2020-09-30 21.3 21.3
I have tried the merge function and rbind from baseR, and bind_rows from dplyr but can't seem to get it right.
Any help?

You can join the two datasets and use coalesce to keep one non-NA value from the two datasets.
library(dplyr)
left_join(df1, df2, by = c("ID", "date_ct")) %>%
mutate(leg_mass = coalesce(legs_mass.x , legs_mass.y),
total_mass = coalesce(total_mass.x, total_mass.y)) %>%
select(-matches('\\.x|\\.y'), -ctscan_label)
# ID time year_of_birth date_ct leg_mass total_mass
# <dbl> <fct> <dbl> <date> <dbl> <dbl>
#1 115 T0 1970 2015-08-04 9.1 14.5
#2 115 FU1 1970 2018-11-07 956. 1016.
#3 370 T0 1961 2015-08-04 NA NA
#4 370 FU1 1961 2020-09-30 21.3 21.3

We can use data.table methods
library(data.table)
setDT(df1)[setDT(df2), c("legs_mass", "total_mass") :=
.(fcoalesce(legs_mass, i.legs_mass),
fcoalesce(total_mass, i.total_mass)), on = .(ID, date_ct)]
-output
df1
ID time year_of_birth date_ct legs_mass total_mass
1: 115 T0 1970 2015-08-04 9.1 14.5
2: 115 FU1 1970 2018-11-07 956.1 1015.9
3: 370 T0 1961 2015-08-04 NA NA
4: 370 FU1 1961 2020-09-30 21.3 21.3

code is running fine line by line but fails when ran as a whole chunk in rmarkdown

When I run just this line of the code, the results are as expected. When I run the chunk, the mutations stop on the third line. How can I fix this, I feel like this is something new that I did not face before with the same code.
Sample data:
> dput(head(out))
structure(list(SectionCut = c("S-1", "S-1", "S-1", "S-1", "S-2",
"S-2"), OutputCase = c("LL-1", "LL-2", "LL-3", "LL-4", "LL-1",
"LL-2"), V2 = c(81.782, 119.251, 119.924, 96.282, 72.503, 109.595
), M3 = c("-29.292000000000002", "-32.661999999999999", "-30.904",
"-23.632999999999999", "29.619", "32.994"), id = c("./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx")), row.names = c(NA,
-6L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
SectionCut = c("S-1", "S-1", "S-1", "S-1", "S-2", "S-2"),
OutputCase = c("LL-1", "LL-2", "LL-3", "LL-4", "LL-1", "LL-2"
), id = c("./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx"
), .rows = list(1L, 2L, 3L, 4L, 5L, 6L)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
> dput(head(Beamline_Shear))
structure(list(VLL = c(159.512186, 154.3336, 149.4451613, 167.0207595,
161.2269091, 156.4116505)), row.names = c("84-9", "84-12", "84-15",
"92-9", "92-12", "92-15"), class = "data.frame")
Code that I am trying to run:
Shear <- out[,-4] %>% mutate(N_l = str_extract(OutputCase,"\\d+"),
UG = str_extract(id,"\\d+"), a = str_extract(id,"-\\d+"),
S = str_extract(a,"\\d+"), Sections = paste0(UG,"-",S),
Sample = str_remove_all(id, "./\\d+-\\d+-|.xlsx")) %>%
left_join(Beamline_Shear %>% rownames_to_column("Sections"), by = "Sections") %>%
select(-OutputCase,-id,-Sections,-a)

There are some group attributes in the data, which should work normally, but can be an issue if we are running in a different env. Also, the mutate step and the join step doesn't really need any grouping attributes as they are fairly very straightforward rowwise operations that are vectorized.
library(dplyr)
out %>%
select(-4) %>%
ungroup %>% # // removes group attributes
mutate(N_l = str_extract(OutputCase,"\\d+"),
UG = str_extract(id,"\\d+"), a = str_extract(id,"-\\d+"),
S = str_extract(a,"\\d+"), Sections = paste0(UG,"-",S),
Sample = str_remove_all(id, "./\\d+-\\d+-|.xlsx")) %>% left_join(Beamline_Shear %>% rownames_to_column("Sections"), by = "Sections")
# A tibble: 6 x 11
# SectionCut OutputCase V2 id N_l UG a S Sections Sample VLL
# <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
#1 S-1 LL-1 81.8 ./100-12-S01.xlsx 1 100 -12 12 100-12 S01 NA
#2 S-1 LL-2 119. ./100-12-S01.xlsx 2 100 -12 12 100-12 S01 NA
#3 S-1 LL-3 120. ./100-12-S01.xlsx 3 100 -12 12 100-12 S01 NA
#4 S-1 LL-4 96.3 ./100-12-S01.xlsx 4 100 -12 12 100-12 S01 NA
#5 S-2 LL-1 72.5 ./100-12-S01.xlsx 1 100 -12 12 100-12 S01 NA
#6 S-2 LL-2 110. ./100-12-S01.xlsx 2 100 -12 12 100-12 S01 NA

Divide column from dataframe into another

I've got 2 data frames that I'm trying to divide by each other but it's not working for me. Both dataframes are 8 x 3 with column one the same for both, column names are also the same for both data frames
bal_tier[,c(1, 3:4)]
# A tibble: 8 x 3
# Groups: hierachy_level2 [8]
hierachy_level2 `201804` `201904`
<chr> <dbl> <dbl>
1 CS 239 250
2 FNZ 87 97
3 OPS 1057 1136.
4 P&T 256 279
5 R&A 520 546
6 SPE 130 136.
7 SPP 67 66
8 TUR 46 69
dput(bal_tier[,c(1, 3:4)])
structure(list(hierachy_level2 = c("CS", "FNZ", "OPS", "P&T",
"R&A", "SPE", "SPP", "TUR"), `201804` = c(239, 87, 1057, 256,
520, 130, 67, 46), `201904` = c(250, 97, 1136.5, 279, 546, 136.5,
66, 69)), row.names = c(NA, -8L), groups = structure(list(hierachy_level2 = c("CS",
"FNZ", "OPS", "P&T", "R&A", "SPE", "SPP", "TUR"), .rows = list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L)), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
tier_leavers[,c(1, 3:4)]
# A tibble: 8 x 3
# Groups: hierachy_level2 [8]
hierachy_level2 `201804` `201904`
<chr> <dbl> <dbl>
1 CS 32 47
2 FNZ 1 11
3 OPS 73 76
4 P&T 48 33
5 R&A 41 33
6 SPE 28 30
7 SPP 10 12
8 TUR 2 3
dput(tier_leavers[,c(1, 3:4)])
structure(list(hierachy_level2 = c("CS", "FNZ", "OPS", "P&T",
"R&A", "SPE", "SPP", "TUR"), `201804` = c(32, 1, 73, 48, 41,
28, 10, 2), `201904` = c(47, 11, 76, 33, 33, 30, 12, 3)), row.names = c(NA,
-8L), groups = structure(list(hierachy_level2 = c("CS", "FNZ",
"OPS", "P&T", "R&A", "SPE", "SPP", "TUR"), .rows = list(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L)), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Doing this gives me what I want:
bal_tier[,1]
# A tibble: 8 x 1
# Groups: hierachy_level2 [8]
hierachy_level2
<chr>
1 CS
2 FNZ
3 OPS
4 P&T
5 R&A
6 SPE
7 SPP
8 TUR
(tier_leavers[,c(3:4)] / bal_tier[,c(3:4)])
201804 201904
1 0.13389121 0.18800000
2 0.01149425 0.11340206
3 0.06906339 0.06687198
4 0.18750000 0.11827957
5 0.07884615 0.06043956
6 0.21538462 0.21978022
7 0.14925373 0.18181818
8 0.04347826 0.04347826
but when I combine it in a cbind I end up with this:
cbind(bal_tier[,1], tier_leavers[,c(3:4)] / bal_tier[,c(3:4)])
[,1] [,2]
201804 Character,8 Numeric,8
201904 Character,8 Numeric,8
What am I understanding wrong here?

Here's a solution using tidyverse
nme <- c("A","B","C","D","E")
yr_1 <- round(10*runif(n=5,min=0,max=10),0)
yr_2 <- round(10*runif(n=5,min=0,max=10),0)
data_1 <- data.frame(nme,yr_1,yr_2)
yr_1 <- round(10*runif(n=5,min=0,max=10),0)
yr_2 <- round(10*runif(n=5,min=0,max=10),0)
data_2 <- data.frame(nme,yr_1,yr_2)
data_divide <- data_1 %>%
left_join(data_2,by="nme") %>%
mutate(
result_1=yr_1.x/yr_1.y,
result_2=yr_2.x/yr_2.y
)

What I ended up doing feels like cheating but I got a clue from Zeus's answer:
a <- bal_tier[, 1]
b <- tier_leavers[,c(3:4)] / bal_tier[,c(3:4)]
tier_to <- data.frame(a, b)
tier_to
> tier_to
hierachy_level2 X201804 X201904
1 CS 0.13389121 0.18800000
2 FNZ 0.01149425 0.11340206
3 OPS 0.06906339 0.06687198
4 P&T 0.18750000 0.11827957
5 R&A 0.07884615 0.06043956
6 SPE 0.21538462 0.21978022
7 SPP 0.14925373 0.18181818
8 TUR 0.04347826 0.04347826

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

How to convert dot in comma in data.frame - r

Related

How to extract the minimum value in all columns in a dataframe in R?

Selecting variable column names for further IRR calculation in R

Merge two dataframes: specifically merge a selection of columns based on two conditions?

code is running fine line by line but fails when ran as a whole chunk in rmarkdown

Divide column from dataframe into another

Categories

Resources