Melt/ reshape dataframe to combine columns and fill rows with NAs - r

Apologies that there is a wealth of information on this site about melting and reshaping data, however, I cannot find the answer to my question on any of the pages I've visited. I have a data set which looks something like:
A Year | A Mean Temp | A Max Temp | A Min Temp | B Year | B Mean Temp | B Max Temp | B Min Temp |
and I want to end up with
Year | A Mean Temp | A Max Temp | A Min Temp |B Mean Temp | B Max Temp | B Min Temp
and fill columns which don't have data for that specific year with 'NA'.
The desired output would be something like:
[Table][1]
I believe the answer lies somewhere in something like:
library(dplyr)
library(tidyr)
library(stringr)
Data %>%
pivot_longer(cols = contains("Year"), names_to = c("Country", ".value"),
names_sep="_", values_drop_na = TRUE) %>%
rename_with(~ str_c('Country_', .), Rating:Year)```
But as of yet no luck.
Any help would be appreciated.
Thank you
Data
structure(list(Antarctica.Year.CE = 167:172, Antarctica.Temp..C. = c(0.33,
0.31, 0.18, 0.08, -0.01, -0.11), Antarctica.Min..C. = c(-1.24,
-1.26, -1.39, -1.48, -1.57, -1.67), Antarctica.Max..C. = c(1.89,
1.87, 1.74, 1.64, 1.55, 1.45), Arctic.Year.CE = 1:6, Arctic.Temp..C. = c(-1.15,
-0.96, -0.32, 0.1, -0.18, -0.61), Arctic.Min..C. = c(-1.92, -1.76,
-1.38, -0.74, -1.08, -1.17), Arctic.Max..C. = c(-0.31, -0.11,
0.48, 0.83, 0.73, 0.16), Asia.Year.CE = 800:805, Asia.Temp..C. = c(-0.31,
-0.14, -0.36, -0.67, -0.78, -0.26), Asia.Min..C. = c(-1.4, -1.23,
-1.45, -1.76, -1.87, -1.35), Asia.Max..C. = c(0.79, 0.96, 0.74,
0.43, 0.31, 0.83), Australasia.Year.CE = 1001:1006, Australasia.Temp..C. = c(-0.24,
-0.38, -0.29, -0.33, -0.34, -0.11), Australasia.Min..C. = c(-0.62,
-0.79, -0.71, -0.73, -0.73, -0.56), Australasia.Max..C. = c(0.15,
0.03, 0.13, 0.07, 0.05, 0.34), Europe.Year.CE = 1:6, Europe.Temp..C. = c(0.09,
-0.26, -0.24, 0.22, 0.32, 0.67), Europe.Min..C. = c(-0.69, -1.14,
-1.18, -0.66, -0.48, -0.11), Europe.Max..C. = c(0.88, 0.56, 0.61,
1.07, 1.14, 1.5), North.America...Pollen.Year.CE = c(480L, 510L,
540L, 570L, 600L, 630L), North.America...Pollen.Temp..C. = c(-0.25,
-0.29, -0.33, -0.34, -0.34, -0.34), North.America...Pollen.Min..C. = c(-0.74,
-0.7, -0.66, -0.65, -0.64, -0.64), North.America...Pollen.Max..C. = c(0.24,
0.11, 0, -0.04, -0.04, -0.04), North.America...Trees.Year.CE = c(1204L,
1214L, 1224L, 1234L, 1244L, 1254L), North.America...Trees.Temp..C. = c(-0.22,
-0.45, -0.38, -0.87, -0.81, -0.06), North.America...Trees.Min..C. = c(-0.53,
-0.72, -0.67, -1.12, -1.09, -0.35), North.America...Trees.Max..C. = c(0.04,
-0.2, -0.11, -0.57, -0.52, 0.18), South.America.Year.CE = 857:862,
South.America.Temp..C. = c(-0.3, -0.21, -0.07, -0.38, -0.41,
-0.19), South.America.Min..C. = c(-1.12, -1, -0.88, -1.19,
-1.22, -0.98), South.America.Max..C. = c(0.53, 0.58, 0.74,
0.43, 0.39, 0.61)), row.names = c(NA, 6L), class = "data.frame") ```
[1]: https://i.stack.imgur.com/0sV7a.png

For something as small as this, I'd often just go with a more manual approach.
Given your df above, I specify the lists of countries in the columns and then grepl() on the df columns to select those columns. Then, we rename the columns, return the new dataframe. We can then apply the function to the list of countries and then rbind with do.call.
country_list = c('Antarctica', 'Arctic', 'Asia', 'Australasia', 'Europe', 'North.America...Pollen', 'North.America...Trees', 'South.America')
get_cols = function(country) {
df_new = df[,grepl(country, colnames(df))]
df_new$Country = rep(country, nrow(df_new))
colnames(df_new) = c('Year', 'Temp', 'Min_Temp', 'Max_Temp', 'Country')
return(df_new)
}
df_final = do.call(rbind, lapply(country_list, get_cols))
Hope that returns what you're looking for?

Related

Is there a way to filter out the row that has the highest of three different columns simultaneously?

Is there a way to filter out the row that has the highest of three different columns simultaneously? I am trying to filter out the row that has the best accuracy, specificity, and sensitivity in a data frame.
Pic of first few rows of data
in the data provided the highest for all 3 should be (aka the desired output)
"thresh_info.59 0.60 83.39 83.27684 83.557047"
data<- structure(list(threshold = c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06,
0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17,
0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28,
0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39,
0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5,
0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72,
0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83,
0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94,
0.95, 0.96, 0.97, 0.98, 0.99), accuracy = c(61.72, 63.67, 65.29,
66.58, 67.86, 69.01, 69.75, 70.83, 71.51, 72.79, 73.87, 74.54,
75.02, 75.29, 75.83, 76.3, 76.7, 77.25, 77.65, 77.92, 78.33,
79, 79.14, 79.07, 79.41, 79.61, 79.68, 80.28, 80.69, 80.82, 80.89,
81.16, 81.3, 81.77, 81.9, 81.97, 82.17, 82.31, 82.44, 82.65,
82.58, 82.92, 82.98, 83.59, 83.52, 83.59, 83.25, 83.46, 83.39,
83.46, 83.66, 83.73, 83.52, 83.66, 83.93, 83.46, 83.25, 83.32,
83.32, 83.39, 83.39, 82.92, 82.24, 82.04, 81.77, 81.3, 81.5,
81.23, 81.03, 80.89, 80.49, 80.35, 80.01, 80.01, 79.2, 79.14,
78.87, 78.93, 78.6, 77.92, 77.25, 76.91, 76.37, 75.56, 74.81,
73.94, 73.13, 72.79, 71.84, 71.51, 69.89, 68.4, 66.44, 64.82,
63.13, 61.44, 59.08, 55.77, 52.8), sensitivity = c(100, 100,
100, 99.8870056497175, 99.8870056497175, 99.6610169491526, 99.6610169491526,
99.5480225988701, 99.3220338983051, 99.2090395480226, 99.0960451977401,
98.7570621468927, 98.6440677966102, 98.5310734463277, 98.1920903954802,
97.9661016949153, 97.7401129943503, 97.6271186440678, 96.8361581920904,
96.3841807909604, 96.045197740113, 95.9322033898305, 95.5932203389831,
95.2542372881356, 94.9152542372881, 94.6892655367232, 94.2372881355932,
94.1242937853107, 94.1242937853107, 93.6723163841808, 93.1073446327684,
92.8813559322034, 92.6553672316384, 92.4293785310735, 92.316384180791,
91.9774011299435, 91.864406779661, 91.5254237288136, 91.2994350282486,
90.8474576271186, 90.0564971751412, 89.9435028248588, 89.7175141242938,
89.2655367231638, 89.0395480225989, 88.8135593220339, 88.135593220339,
87.909604519774, 87.3446327683616, 87.0056497175141, 86.5536723163842,
86.3276836158192, 85.6497175141243, 85.5367231638418, 85.5367231638418,
84.7457627118644, 84.180790960452, 83.8418079096045, 83.6158192090395,
83.2768361581921, 83.0508474576271, 82.0338983050847, 80.6779661016949,
80.225988700565, 79.3220338983051, 78.1920903954802, 77.9661016949153,
76.9491525423729, 76.1581920903955, 75.4802259887006, 74.3502824858757,
73.7853107344633, 72.8813559322034, 72.4293785310735, 70.8474576271186,
70.1694915254237, 69.1525423728814, 68.8135593220339, 68.135593220339,
66.8926553672316, 65.6497175141243, 64.4067796610169, 63.3898305084746,
61.9209039548023, 60.4519774011299, 58.6440677966102, 57.2881355932203,
56.1581920903955, 54.3502824858757, 53.3333333333333, 50.6214689265537,
48.135593220339, 44.8587570621469, 41.9209039548023, 38.6440677966102,
35.7062146892655, 31.638418079096, 26.1016949152542, 21.1299435028249
), specificity = c(4.86577181208054, 9.73154362416107, 13.758389261745,
17.1140939597315, 20.3020134228188, 23.489932885906, 25.3355704697987,
28.1879194630872, 30.2013422818792, 33.5570469798658, 36.4093959731544,
38.5906040268456, 39.9328859060403, 40.7718120805369, 42.6174496644295,
44.1275167785235, 45.4697986577181, 46.9798657718121, 49.1610738255034,
50.503355704698, 52.0134228187919, 53.8590604026846, 54.6979865771812,
55.0335570469799, 56.3758389261745, 57.2147651006711, 58.0536912751678,
59.7315436241611, 60.738255033557, 61.744966442953, 62.751677852349,
63.758389261745, 64.4295302013423, 65.9395973154362, 66.4429530201342,
67.1140939597315, 67.7852348993289, 68.6241610738255, 69.2953020134228,
70.4697986577181, 71.4765100671141, 72.4832214765101, 72.9865771812081,
75.1677852348993, 75.3355704697987, 75.8389261744966, 76.006711409396,
76.8456375838926, 77.5167785234899, 78.1879194630873, 79.3624161073825,
79.8657718120805, 80.3691275167785, 80.8724832214765, 81.5436241610738,
81.5436241610738, 81.8791946308725, 82.5503355704698, 82.8859060402685,
83.5570469798658, 83.8926174496644, 84.2281879194631, 84.5637583892617,
84.7315436241611, 85.4026845637584, 85.9060402684564, 86.744966442953,
87.5838926174497, 88.255033557047, 88.9261744966443, 89.5973154362416,
90.1006711409396, 90.6040268456376, 91.2751677852349, 91.6107382550336,
92.4496644295302, 93.2885906040269, 93.9597315436242, 94.1275167785235,
94.2953020134228, 94.4630872483222, 95.4697986577181, 95.6375838926175,
95.8053691275168, 96.1409395973154, 96.6442953020134, 96.6442953020134,
97.4832214765101, 97.8187919463087, 98.489932885906, 98.489932885906,
98.489932885906, 98.489932885906, 98.8255033557047, 99.496644295302,
99.6644295302013, 99.8322147651007, 99.8322147651007, 99.8322147651007
)), row.names = c("thresh_info", "thresh_info.1", "thresh_info.2",
"thresh_info.3", "thresh_info.4", "thresh_info.5", "thresh_info.6",
"thresh_info.7", "thresh_info.8", "thresh_info.9", "thresh_info.10",
"thresh_info.11", "thresh_info.12", "thresh_info.13", "thresh_info.14",
"thresh_info.15", "thresh_info.16", "thresh_info.17", "thresh_info.18",
"thresh_info.19", "thresh_info.20", "thresh_info.21", "thresh_info.22",
"thresh_info.23", "thresh_info.24", "thresh_info.25", "thresh_info.26",
"thresh_info.27", "thresh_info.28", "thresh_info.29", "thresh_info.30",
"thresh_info.31", "thresh_info.32", "thresh_info.33", "thresh_info.34",
"thresh_info.35", "thresh_info.36", "thresh_info.37", "thresh_info.38",
"thresh_info.39", "thresh_info.40", "thresh_info.41", "thresh_info.42",
"thresh_info.43", "thresh_info.44", "thresh_info.45", "thresh_info.46",
"thresh_info.47", "thresh_info.48", "thresh_info.49", "thresh_info.50",
"thresh_info.51", "thresh_info.52", "thresh_info.53", "thresh_info.54",
"thresh_info.55", "thresh_info.56", "thresh_info.57", "thresh_info.58",
"thresh_info.59", "thresh_info.60", "thresh_info.61", "thresh_info.62",
"thresh_info.63", "thresh_info.64", "thresh_info.65", "thresh_info.66",
"thresh_info.67", "thresh_info.68", "thresh_info.69", "thresh_info.70",
"thresh_info.71", "thresh_info.72", "thresh_info.73", "thresh_info.74",
"thresh_info.75", "thresh_info.76", "thresh_info.77", "thresh_info.78",
"thresh_info.79", "thresh_info.80", "thresh_info.81", "thresh_info.82",
"thresh_info.83", "thresh_info.84", "thresh_info.85", "thresh_info.86",
"thresh_info.87", "thresh_info.88", "thresh_info.89", "thresh_info.90",
"thresh_info.91", "thresh_info.92", "thresh_info.93", "thresh_info.94",
"thresh_info.95", "thresh_info.96", "thresh_info.97", "thresh_info.98"
), class = "data.frame")
You can filter by the minimum variance across the 3 columns:
library(dplyr)
data |>
tibble::rownames_to_column() |>
rowwise() |>
mutate(var = var(c_across(3:5))) |>
ungroup() |>
filter(var == min(var))
# A tibble: 1 × 6
rowname threshold accuracy sensitivity specificity var
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 thresh_info.59 0.6 83.4 83.3 83.6 0.0199

Complex clipping (spatial intersection ?) of polygons and lines in R

I would like to clip (or maybe the right formulation is performing spatial intersection) polygons and lines using a polygon rather than a rectangle, like so:
Here is some code to make the polygons for reproducibility and examples:
p1 <- data.frame(x = c(-0.81, -0.45, -0.04, 0.32, 0.47, 0.86, 0.08, -0.46, -1, -0.76),
y = c(0.46, 1, 0.64, 0.99, -0.04, -0.14, -0.84, -0.24, -0.44, 0.12))
p2 <- data.frame(x = c(-0.63, -0.45, -0.2, -0.38, -0.26, -0.82, -0.57, -0.76),
y = c(-0.1, 0.15, -0.17, -0.79, -1, -0.97, -0.7, -0.61))
l1 <- data.frame(x = c(0.1, 0.28, 0.29, 0.52, 0.51, 0.9, 1),
y = c(0.19, -0.15, 0.25, 0.28, 0.64, 0.9, 0.47))
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1,1))
polygon(p2$x, p2$y, col = "blue")
polygon(p1$x, p1$y)
lines(l1$x, l1$y)
You could use the spatstat package for this. Below the original example is
worked through. In spatstat polygons are used as “observation windows” of
point patterns, so they are of class owin. It is possible to do set
intersection, union etc. with owin objects.
p1 <- data.frame(x = c(-0.81, -0.45, -0.04, 0.32, 0.47, 0.86, 0.08, -0.46, -1, -0.76),
y = c(0.46, 1, 0.64, 0.99, -0.04, -0.14, -0.84, -0.24, -0.44, 0.12))
p2 <- data.frame(x = c(-0.63, -0.45, -0.2, -0.38, -0.26, -0.82, -0.57, -0.76),
y = c(-0.1, 0.15, -0.17, -0.79, -1, -0.97, -0.7, -0.61))
l1 <- data.frame(x = c(0.1, 0.28, 0.29, 0.52, 0.51, 0.9, 1),
y = c(0.19, -0.15, 0.25, 0.28, 0.64, 0.9, 0.47))
In spatstat polygons must be traversed anti-clockwise, so:
library(spatstat)
p1rev <- lapply(p1, rev)
p2rev <- lapply(p2, rev)
W1 <- owin(poly = p1rev)
W2 <- owin(poly = p2rev)
L1 <- psp(x0 = l1$x[-nrow(l1)], y0 = l1$y[-nrow(l1)],
x1 = l1$x[-1], y1 = l1$y[-1], window = boundingbox(l1))
plot(boundingbox(W1,W2,L1), type= "n", main = "Original")
plot(W2, col = "blue", add = TRUE)
plot(W1, add = TRUE)
plot(L1, add = TRUE)
W2clip <- W2[W1]
L1clip <- L1[W1]
plot(W1, main = "Clipped")
plot(W2clip, col = "blue", add = TRUE)
plot(L1clip, add = TRUE)

How to change the a axis to a time series in ggplot2

I'm trying to replicate the graph provided at https://www.chicagofed.org/research/data/cfnai/current-data since I will be needing graphs for data sets soon that look like this. I'm almost there, I can't seem to figure out how to change the x axis to the dates when using ggplot2. Specifically, I would like to change it to the dates in the Date column. I tried about a dozen ways and nothing is working. The data for this graph is under indexes on the website. Here's my code and the graph where dataSet is the data from the website:
library(ggplot2)
library(reshape2)
library(tidyverse)
library(lubridate)
df = data.frame(time = index(dataSet), melt(as.data.frame(dataSet)))
df
str(df)
df$data1.Date = as.Date(as.character(df$data1.Date))
str(df)
replicaPlot1 = ggplot(df, aes(x = time, y = value)) +
geom_area(aes(colour = variable, fill = variable)) +
stat_summary(fun = sum, geom = "line", size = 0.4) +
labs(title = "Chicago Fed National Activity Index (CFNAI) Current Data")
replicaPlot1 + scale_x_continuous(name = "time", breaks = waiver(), labels = waiver(), limits =
df$data1.Date)
replicaPlot1
Any sort of help on this would be very much appreciated!
G:\BOS\Common\R-Projects\Graphs\Replica of Chicago Fed National Acitivty index (PCA)\dataSet
Not sure what's your intention with data.frame(time = index(dataSet), melt(as.data.frame(dataSet))). When I download the data and read via readxl::read_excel I got a nice tibble with a date(time) column which after reshaping via tidyr::pivot_longer could easily be plotted and by making use of scale_x_datetime has a nicely formatted date axis:
Using just the first 20 rows of data try this:
library(ggplot2)
library(readxl)
library(tidyr)
df <- pivot_longer(df, -Date, names_to = "variable")
ggplot(df, aes(x = Date, y = value)) +
geom_area(aes(colour = variable, fill = variable)) +
stat_summary(fun = sum, geom = "line", size = 0.4) +
labs(title = "Chicago Fed National Activity Index (CFNAI) Current Data") +
scale_x_datetime(name = "time")
#> Warning: Removed 4 rows containing non-finite values (stat_summary).
#> Warning: Removed 4 rows containing missing values (position_stack).
Created on 2021-01-28 by the reprex package (v1.0.0)
DATA
# Data downloaded from https://www.chicagofed.org/~/media/publications/cfnai/cfnai-data-series-xlsx.xlsx?la=en
# df <- readxl::read_excel("cfnai-data-series-xlsx.xlsx")
# dput(head(df, 20))
df <- structure(list(Date = structure(c(
-87004800, -84412800, -81734400,
-79142400, -76464000, -73785600, -71193600, -68515200, -65923200,
-63244800, -60566400, -58060800, -55382400, -52790400, -50112000,
-47520000, -44841600, -42163200, -39571200, -36892800
), tzone = "UTC", class = c(
"POSIXct",
"POSIXt"
)), P_I = c(
-0.26, 0.16, -0.43, -0.09, -0.19, 0.58, -0.05,
0.21, 0.51, 0.33, -0.1, 0.12, 0.07, 0.04, 0.35, 0.04, -0.1, 0.14,
0.05, 0.11
), EU_H = c(
-0.06, -0.09, 0.01, 0.04, 0.1, 0.22, -0.04,
0, 0.32, 0.16, -0.2, 0.34, 0.06, 0.17, 0.17, 0.07, 0.12, 0.12,
0.15, 0.18
), C_H = c(
-0.01, 0.01, -0.05, 0.08, -0.07, -0.01,
0.12, -0.11, 0.1, 0.15, -0.04, 0.04, 0.17, -0.03, 0.05, 0.08,
0.09, 0.05, -0.06, 0.09
), SO_I = c(
-0.01, -0.07, -0.08, 0.02,
-0.16, 0.22, -0.08, -0.07, 0.38, 0.34, -0.13, -0.1, 0.08, -0.07,
0.06, 0.07, 0.12, -0.3, 0.35, 0.14
), CFNAI = c(
-0.34, 0.02, -0.55,
0.04, -0.32, 1, -0.05, 0.03, 1.32, 0.97, -0.46, 0.39, 0.38, 0.11,
0.63, 0.25, 0.22, 0.01, 0.49, 0.52
), CFNAI_MA3 = c(
NA, NA, -0.29,
-0.17, -0.28, 0.24, 0.21, 0.33, 0.43, 0.77, 0.61, 0.3, 0.1, 0.29,
0.37, 0.33, 0.37, 0.16, 0.24, 0.34
), DIFFUSION = c(
NA, NA, -0.17,
-0.14, -0.21, 0.16, 0.11, 0.17, 0.2, 0.5, 0.41, 0.28, 0.2, 0.32,
0.36, 0.32, 0.33, 0.25, 0.31, 0.47
)), row.names = c(NA, -20L), class = c(
"tbl_df",
"tbl", "data.frame"
))

How do I make segments (of my probabilities?)

I was wondering if there is a function which can help me with segmentation. Via mixtools (logisregmixEM), I got an optimum of 3 segments with corresponding size of 2.5%, 40.3% and 57.2%. I also got posterior probabilities. Is there any way how to create three separate segments with corresponding observations based on the probabilities, in which I end up with 3 segments with the above called sizes?
For what its worth some background information of my coefficients, and probabilities:
> dput(head(betas))
structure(list(comp1 = c(4.57, 0.08, 0.91, -0.11, 0.09, 0.07),
comp2 = c(2.04, -0.22, 0.19, 0.34, -0.34, -0.01), comp3 = c(0.88,
0.03, 0.42, -0.02, -0.17, -0.01)), row.names = c("beta.0",
"beta.1", "beta.2", "beta.3", "beta.4", "beta.5"), class = "data.frame")
> dput(head(posteriorp))
structure(c(0.06, 0.03, 0, 0.03, 0, 0, 0.61, 0.42, 0.07, 0.41,
0.31, 0.41, 0.33, 0.56, 0.93, 0.56, 0.69, 0.59), .Dim = c(6L,
3L), .Dimnames = list(NULL, c("comp.1", "comp.2", "comp.3")))

for loop a non-parametric test filtered by year and save each year's result as a data frame

I'm trying to create and print a list of data frames that are the result of the Mann-Whitney-Wilcoxon Test.
My code currently runs the Mann-Whitney-Wilcoxon Test on all the observations and compares the two data frames, ORATIOS and KFMARATIOS.
library(tidyverse)
library(devtools)
library(inspectdf)
library(readr)
library(broom)
library(knitr)
library(readxl)
library(skimr)
library(kableExtra)
list_ratio <- grep("RATIO",colnames(ORATIOS), value=TRUE)
MWU_pvalues <- unlist(Map(function(a,b) wilcox.test(a, b)$p.value, ORATIOS[list_ratio], KFMARATIOS[list_ratio]))
MWU_pvalues <- as.data.frame(MWU_pvalues) %>%
rename(`P VALUE` = MWU_pvalues)
MWU_pvalues <- tibble::rownames_to_column(MWU_pvalues, "RATIO") %>%
mutate(`Significance` = if_else(`P VALUE` > 0.05, "",
if_else(`P VALUE` <= 0.05 & `P VALUE` >= 0.01, "\\*",
if_else(`P VALUE` <= 0.01 & `P VALUE` >= 0.001, "**", "***"))))
kable(MWU_pvalues) %>%
kable_styling()
How would I create a for loop or lapply filtering on each year, running the above test, saving each result as a dataframe into a list of dataframes? I'd like to have each dataframe for each year printed using kable in my RMarkdown file.
Sample data:
ORATIOS:
structure(list(YEAR = c(2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017), FARM = c("D", "D", "D", "D", "D", "D", "D", "D",
"D", "D", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I"),
`CURRENT RATIO` = c(0.568022785746452, 0.329854720020037,
0.832073159580644, 0.643108790851367, 25.1454874121908, 14.5975395062397,
5.12537888750377, 5.20160770260219, 7.64257374037806, 2.1580962424325,
1.31703632160198, 0.125166573684741, 0.0680923398879462,
0.100452384108057, 0.0998706900125819, 0.0907309088049343,
0.521537398114045, 0.773433351511582, 0.174099653043861,
0.0804425861373205), `WORKING CAPITAL TO GROSS FARMING INCOME` = c(-0.132573843177753,
-0.419436996986394, -0.031444400685141, -0.114022796397208,
1.22962822585944, 0.397841184148093, 0.239623650110705, 0.295681875030473,
0.502930206605254, 0.41862926754376, 0.0513905118422565,
-0.406448322702947, -0.343476652794216, -0.366684678854441,
-0.27321810774102, -0.306827980132377, -0.173010159020099,
-0.140768598200492, -0.367184395657858, -0.888263538055031
), `DEBT TO TOTAL ASSET RATIO` = c(0.0846892634197993, 0.102127561711337,
0.0750728145035032, 0.0797349374471145, 0.0122514875519798,
0.0162967044282012, 0.0165670856047258, 0.0188732833402721,
0.0150968780472965, 0.0275252089477482, 0.1123291162633,
0.151496340475165, 0.0960615511639704, 0.0985641068765839,
0.119816717131179, 0.121164074695269, 0.0970056997272376,
0.139114211255347, 0.0686657852466466, 0.17098484263781),
`DEBT TO FARM ASSET RATIO` = c(0.0935832744841849, 0.114259598684054,
0.0824723632268821, 0.08365143337564, 0.0129689938858425,
0.0191316764222117, 0.0216751963945452, 0.0225358439285237,
0.0167830935834987, 0.030821228954403, 0.140068283663094,
0.203393535891141, 0.133942894025292, 0.137887444914688,
0.17818477721901, 0.182143899668642, 0.141540075268137, 0.212926916788055,
0.0962721755129152, 0.172706971368876), `EQUITY TO ASSET RATIO` = c(0.915310736580201,
0.897872438288663, 0.924927185496497, 0.920265062552885,
0.98774851244802, 0.983703295571799, 0.983432914395274, 0.981126716659728,
0.984903121952704, 0.972474791052252, 0.8876708837367, 0.848503659524835,
0.90393844883603, 0.901435893123416, 0.880183282868821, 0.878835925304732,
0.902994300272762, 0.860885788744653, 0.931334214753353,
0.82901515736219), `DEBT TO EQUITY RATIO` = c(0.0925251502415636,
0.113743954437438, 0.0811661887343104, 0.0866434472975902,
0.0124034482437396, 0.0165666868267717, 0.0168461776723358,
0.0192363361631072, 0.0153282873318188, 0.0283042904566863,
0.126543652970169, 0.178545300040313, 0.106270013503315,
0.109341227289126, 0.13612700838927, 0.137868823072129, 0.107426702137473,
0.161594270778014, 0.0737284040024573, 0.206250562633691),
`RETURN ON FARM ASSETS` = c(0.0170145283510924, -0.00522377886147693,
0.0237250420249203, 0.00257743472229431, 0.0213365859181817,
0.0244609737360482, 0.0279373354305636, 0.0167869242322396,
0.0572363957452595, -0.00273821783417637, 0.0325678749005671,
-0.0532931806283685, 0.024215521265722, -0.0178636730481072,
0.0189254399688753, 0.00211416100547258, -0.00938005681041073,
0.0501921695586829, 0.0215269026374393, -0.0366154070757298
), `RETURN ON ASSETS` = c(0.0566608458884666, 0.0239054711694685,
0.0264084815850861, 0.00576204495548541, 0.179667366138176,
0.0246773695339781, 0.0246552659101915, 0.020526505137709,
0.0551370549195115, -5.05665725060606e-05, 0.0449112877923212,
-0.0284073208306705, 0.0249952584312144, -0.00283565027536605,
0.0360687362998932, 0.0080927754538142, -0.00331579015236834,
0.0457634829675583, 0.0229640648122328, -0.023016837706958
), `RETURN ON EQUITY` = c(0.0168221490501512, -0.00520020437367425,
0.023349291367177, 0.00266962346623839, 0.0204061503508897,
0.0211814836515069, 0.0217131742563291, 0.0143291246913213,
0.0522749822883451, -0.002514608130223, 0.0294232052511338,
-0.0467824450944562, 0.0192125442012039, -0.0141654371518756,
0.0144583817182496, 0.00160025611694793, -0.00711931632857772,
0.0380917883044123, 0.0164860113123938, -0.0437269454184399
), `FARM OPERATING PROFIT MARGIN RATIO` = c(0.113108456739495,
-0.0455472105804567, 0.199838203998892, 0.0234275923606582,
0.158472105656006, 0.183710042172317, 0.190582976791897,
0.124927655425634, 0.45847835351018, -0.0422031337055503,
0.122121670323183, -0.243017854350921, 0.11277681710057,
-0.0790679940692684, 0.076084143213901, 0.00890894198839937,
-0.0450368591167229, 0.204577659697265, 0.13619384495868,
-0.358538500350435), `ASSET TURNOVER RATIO` = c(0.0153974936379558,
-0.00466912018059027, 0.0215963943475807, 0.00245676120615052,
0.0201561446538819, 0.0208362952730876, 0.0213534502396742,
0.0140586870610039, 0.0514857932558134, -0.00244539301601691,
0.0261181226076402, -0.0396950758641658, 0.0173669574034299,
-0.0127692334904846, 0.0127260258857395, 0.00140636256526249,
-0.00642870206654449, 0.0327926792191383, 0.0153539864000432,
-0.0362503005370359), `OPERATING EXPENSE RATIO` = c(0.671535228245263,
0.773166498456329, 0.607985458258, 0.724432447012029, 0.67336000606662,
0.64796797949329, 0.589032574693052, 0.74988495257417, 0.461775664398759,
0.862141471389961, 0.672863504023624, 0.980455882037588,
0.669661413731221, 0.86690216270866, 0.670033358895902, 0.737005445439968,
0.783494244501376, 0.649760819934915, 0.706382908455109,
1.134948535946), `DEPRECIATION EXPENSE RATIO` = c(0.12660532789432,
0.132732814909818, 0.103826844188336, 0.144629676126728,
0.140059287930065, 0.157478624539652, 0.141620283491016,
0.0919194664659044, 0.0583370508964949, 0.133579109920113,
0.150646135557582, 0.183514628711121, 0.146236932328879,
0.16125312788589, 0.191531747619893, 0.197293862401247, 0.193527787561396,
0.0913809290148264, 0.0946887014018637, 0.145522583536315
), `INTEREST EXPENSE RATIO` = c(0.0887509871209225, 0.139647897214309,
0.0883494935547731, 0.107510284500585, 0.028108600347309,
0.0108433537947408, 0.0787641650240354, 0.0332679255342914,
0.0214089311945663, 0.0464825523954769, 0.0543686900956105,
0.0790473436022124, 0.0713248368393299, 0.0509127034747178,
0.0623507502703033, 0.0567917501703862, 0.068014827053951,
0.0542805913529945, 0.0627345451843474, 0.0780673808681226
), `NET FARM INCOME RATIO` = c(0.113108456739495, -0.0455472105804567,
0.199838203998892, 0.0234275923606582, 0.158472105656006,
0.183710042172317, 0.190582976791897, 0.124927655425634,
0.45847835351018, -0.0422031337055503, 0.122121670323183,
-0.243017854350921, 0.11277681710057, -0.0790679940692684,
0.076084143213901, 0.00890894198839937, -0.0450368591167229,
0.204577659697265, 0.13619384495868, -0.358538500350435)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -20L))
KFMARATIOS:
structure(list(YEAR = c(2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008
), FARM = c(11407100, 11484600, 11485100, 11495100, 11801800,
11806400, 11820000, 11885400, 11886000, 11897200, 11897300, 12004500,
12004501, 12303001, 12340101, 12398300, 13050001, 13700201, 13705601,
14089100, 14110900, 14130000, 14130002, 14184100, 14192300, 14330302,
14388200, 14783200, 14786200, 15094200, 15096200, 15584200, 15586100,
15682100, 15683100, 15689100, 16507002, 16580000, 16598200, 16601300
), `CURRENT RATIO` = c(-3, 0, 4.57, 15.94, 2.22, 0, 368.69, 1.86,
9.1, 3.45, 2, 0, 1.58, 6.26, 1.97, 1.54, 0, 3.39, 313.09, 5.59,
5.4, 0, 3.6, 5.78, 3.18, 207.1, 2.36, 28.31, 3.4, 3.68, 0.37,
3.5, 5.6, 13.64, 7.05, 0, 2.23, 0.89, 4.4, 1.11), `WORKING CAPITAL TO GROSS FARMING INCOME` = c(0.783990044655886,
0.939342207539837, 0.468883358203084, 0.53708199556795, 0.429230789973027,
0.856616290636639, 0.46085746623408, 0.019246546772549, 1.04338230212655,
0.318770448161572, 0.398058372857175, 0.506978780306214, 0.263816960947357,
0.4960655740923, 0.101962576323424, 0.220623464476751, 1.12676140487953,
0.533690322762107, 0.685276501922026, 0.703540899065169, 0.660869855557338,
0.71777803486123, 0.319578323479609, 0.722736340214157, 0.286630301648443,
0.818610240507597, 0.184477489966846, 0.78148168000963, 0.357891811040315,
0.289159422203956, -0.125641128630768, 0.392321597654173, 0.561996673317676,
0.353452531903466, 0.683345718597063, 0.804567295215173, 0.307398272114796,
-0.375449779668313, 0.186702574682293, -0.55737251721071), `DEBT TO TOTAL ASSET RATIO` = c(0.02,
0.07, 0.27, 0.37, 0.36, 0, 0.07, 0.37, 0.05, 0.33, 0.42, 0.08,
0.24, 0.34, 0.36, 0.51, 0.01, 0.11, 0.1, 0.07, 0.08, 0.01, 0.32,
0.14, 0.4, 0.52, 0.39, 0.06, 0.21, 0.32, 0.43, 0.52, 0.29, 0.12,
0.17, 0.1, 0.15, 0.87, 0.12, 0.69), `DEBT TO FARM ASSET RATIO` = c(0.0210960466847519,
0.0662443993261916, 0.270051570315789, 0.373240578143398, 0.359031265562519,
0, 0.0678176279710153, 0.369000587598404, 0.04831743727994, 0.33065743433488,
0.41680939549244, 0.0851067276205844, 0.245359588845858, 0.337912727823456,
0.356607488633417, 0.508663012923272, 0.0126098421632802, 0.10665178903834,
0.105106247793806, 0.0698908293989529, 0.0818483764283224, 0.00750932570017385,
0.319501072718455, 0.136757510256717, 0.400840648545665, 0.516753083750126,
0.389587948103612, 0.0577299469460252, 0.206521419569117, 0.315261383020663,
0.43256943562472, 0.520491208048298, 0.290288373137576, 0.120229338185664,
0.173192986515349, 0.104536048245734, 0.151997186500475, 0.868552025800098,
0.123958600776313, 0.692195974317741), `EQUITY TO ASSET RATIO` = c(0.98536882817945,
0.944215770167283, 0.736537746555766, 0.729860554651407, 0.642228778874089,
1, 0.94228148558872, 0.630999412401596, 0.95168256272006, 0.66934256566512,
0.592693562701164, 0.914893272379416, 0.813956784138156, 0.688995447780108,
0.725420084109645, 0.545241148972386, 0.988536562104007, 0.900124825958172,
0.90344241855196, 0.930936390469265, 0.92060316189968, 0.992490674299826,
0.758518009863028, 0.881474617998699, 0.600468426703118, 0.553595877267449,
0.667405715763261, 0.942270053053975, 0.842757601135073, 0.708413078986436,
0.56743056437528, 0.533041296742996, 0.743304732269968, 0.88511363093375,
0.831970255984885, 0.904591907651469, 0.876296809602567, 0.131447974199902,
0.890119750534961, 0.307804025682259), `DEBT TO EQUITY RATIO` = c(0.02,
0.07, 0.37, 0.6, 0.56, 0, 0.07, 0.58, 0.05, 0.49, 0.72, 0.09,
0.32, 0.51, 0.55, 1.04, 0.01, 0.12, 0.12, 0.08, 0.09, 0.01, 0.47,
0.16, 0.67, 1.07, 0.64, 0.06, 0.26, 0.46, 0.76, 1.08, 0.41, 0.14,
0.21, 0.12, 0.18, 6.61, 0.14, 2.25), `RETURN ON FARM ASSETS` = c(0.374484329540697,
0.0498819566035984, 0.181954755022922, 0.193161758267218, 0.0473627311001023,
0.327305563029612, 0.603037930741254, -0.0156737997438482, 0.10397858597475,
0.10789191406389, 0.180771277730155, 0.150007797084, 0.174196776278552,
0.120122100767257, 0.298096858936563, 0.0517125227815447, 0.111597414809764,
0.185024421154621, 0.239979711875599, 0.0808784377916965, 0.201436668181771,
0.135024051506645, 0.251851638310215, 0.103285147847268, 0.14207589091784,
0.247675592658745, 0.100067311604358, 0.308209326567443, 0.154555623216289,
0.174464204907127, 0.00457531564104158, 0.098141499884622, 0.251116584438097,
0.153198476415449, 0.183688952743912, 0.0838032420725189, 0.169288085631256,
0.0279120898963428, 0.147329195543669, 0.034801030826966), `RETURN ON ASSETS` = c(0.260063898261748,
0.0581159003954688, 0.186586004612603, 0.144217266907855, 0.0471965084015535,
0.203276288956977, 0.522691591931166, -0.0156737997438482, 0.104160943214225,
0.110451790466256, 0.178360409188664, 0.150089138729099, 0.134029707705111,
0.120565772385725, 0.229528019076799, 0.0697390623585822, 0.10198296142804,
0.192570247620748, 0.245119340816501, 0.115758491252085, 0.195889106965538,
0.138158444053898, 0.231674956423303, 0.0966027636728098, 0.141766843553559,
0.215113054221126, 0.135495862386357, 0.314351616201071, 0.133076845003381,
0.168262801476855, 0.00457531564104158, 0.0986664889666124, 0.242490501823923,
0.152124266735103, 0.201716489655936, 0.0786665142081486, 0.162659186669921,
0.0279454048764536, 0.134992616527726, 0.034801030826966), `RETURN ON EQUITY` = c(0.263580248064511,
0.0444871419402714, 0.241012793134955, 0.191549228659637, 0.0734886226747657,
0.186089113513671, 0.544673844576945, -0.0248396423765173, 0.109257634896201,
0.161190875342999, 0.298045789765326, 0.163962072531003, 0.162274234481587,
0.160460729376603, 0.31640703656353, 0.0847926292565323, 0.102628180483108,
0.192493344561337, 0.244023637469295, 0.0858503015508329, 0.212255623707772,
0.13604566269794, 0.250952374400512, 0.101551944180348, 0.235835707060263,
0.386487527831846, 0.128000474163853, 0.327092350614891, 0.139632557156543,
0.227780755169442, 0.0080632167674627, 0.165179790324242, 0.298742298993181,
0.165391606109475, 0.214205739228479, 0.084552656304169, 0.157224605882577,
0.212343248849882, 0.146717984157146, 0.113062299136044), `FARM OPERATING PROFIT MARGIN RATIO` = c(0.55,
0.18, 0.29, 0.33, 0.12, 0.46, 0.24, -0.1, 0.14, 0.23, 0.2, 0.22,
0.44, 0.25, 0.33, 0.13, 0.36, 0.44, 0.33, 0.05, 0.32, 0.16, 0.52,
0.3, 0.24, 0.35, 0.2, 0.32, 0.38, 0.29, 0.02, 0.24, 0.36, 0.25,
0.4, 0.18, 0.32, -0.01, 0.08, -0.01), `ASSET TURNOVER RATIO` = c(0.64,
0.2, 0.55, 0.58, 0.29, 0.64, 1.88, 0.39, 0.31, 0.34, 0.72, 0.58,
0.38, 0.41, 0.96, 0.38, 0.26, 0.4, 0.62, 0.41, 0.55, 0.67, 0.53,
0.29, 0.51, 0.86, 0.38, 0.94, 0.4, 0.54, 0.65, 0.49, 0.7, 0.49,
0.41, 0.3, 0.47, 0.62, 0.87, 0.79), `OPERATING EXPENSE RATIO` = c(0.29,
0.57, 0.61, 0.52, 0.69, 0.48, 0.57, 0.89, 0.64, 0.57, 0.72, 0.62,
0.45, 0.55, 0.52, 0.69, 0.49, 0.43, 0.5, 0.75, 0.53, 0.69, 0.38,
0.54, 0.6, 0.54, 0.55, 0.56, 0.5, 0.57, 0.87, 0.61, 0.54, 0.63,
0.44, 0.61, 0.56, 0.82, 0.77, 0.83), `DEPRECIATION EXPENSE RATIO` = c(0.08,
0.16, 0.01, 0.05, 0.07, 0.02, 0.03, 0.09, 0.02, 0.06, 0.03, 0.1,
0.04, 0.08, 0.06, 0.1, 0.06, 0.05, 0.03, 0.04, 0.08, 0.09, 0.04,
0.06, 0.05, 0.01, 0.11, 0.05, 0.04, 0.06, 0.05, 0.08, 0.04, 0.03,
0.06, 0.08, 0.01, 0.1, 0.05, 0.04), `INTEREST EXPENSE RATIO` = c(0.01,
0, 0.03, 0.07, 0.08, 0, 0, 0.06, 0, 0.02, 0.04, 0.01, 0.02, 0.06,
0.03, 0.06, 0, 0, 0.03, 0.01, 0.02, 0, 0.06, 0.01, 0.05, 0, 0.07,
0, 0.04, 0.01, 0.08, 0.1, 0.04, 0.02, 0.03, 0.02, 0.04, 0.04,
0.01, 0.09), `NET FARM INCOME RATIO` = c(0.62, 0.27, 0.35, 0.36,
0.16, 0.5, 0.39, -0.04, 0.34, 0.35, 0.22, 0.28, 0.49, 0.31, 0.39,
0.15, 0.45, 0.51, 0.44, 0.2, 0.37, 0.21, 0.52, 0.39, 0.29, 0.45,
0.27, 0.39, 0.43, 0.36, 0.01, 0.21, 0.37, 0.32, 0.47, 0.28, 0.38,
0.05, 0.17, 0.05)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-40L))
My solution is kind of convuluted but I guess it is never easy to work with list columns,
nested_oratios <- ORATIOS %>%
group_by(YEAR) %>%
nest() %>%
mutate(fake_year = 2008) %>%
ungroup()
nested_kfmaratios <- KFMARATIOS %>%
group_by(YEAR) %>%
nest() %>%
mutate(fake_year = 2008) %>%
ungroup() %>%
select(-YEAR)
nested_comb <- nested_oratios %>%
left_join(nested_kfmaratios,by = c('fake_year'),suffix = c(".oratios", ".kfmaratios")) %>%
select(-fake_year)
logic_pipe <- function(a,b) {
a <- a %>% select(contains('RATIO'))
b <- b %>% select(contains('RATIO'))
MWU_pvalues <- map2(a,b,function(a,b) wilcox.test(a, b)$p.value) %>% unlist()
MWU_pvalues <- as.data.frame(MWU_pvalues) %>%
rename(`P VALUE` = MWU_pvalues)
MWU_pvalues <- tibble::rownames_to_column(MWU_pvalues, "RATIO") %>%
mutate(`Significance` = if_else(`P VALUE` > 0.05, "",
if_else(`P VALUE` <= 0.05 & `P VALUE` >= 0.01, "\\*",
if_else(`P VALUE` <= 0.01 & `P VALUE` >= 0.001, "**", "***"))))
return(MWU_pvalues %>% as_tibble())
}
nested_comb %>%
mutate(result = map2(.x = data.oratios ,.y =data.kfmaratios,logic_pipe))
Consider the apply family with mapply and by (object-oriented wrapper to tapply) that can subset your data by year and pass into a user-defined function. Note: unlist + Map can be replaced with mapply (the underlying function of Map, its wrapper). Below demonstrates with base R where transform replaces mutate and ifelse replaces if_else:
proc_df <- function(df) {
yr <- df$YEAR[1]
MWU_pvalues <- mapply(function(a,b) wilcox.test(a, b)$p.value,
subset(ORATIOS, YEAR==yr)[list_ratio], df[list_ratio])
final_df <- transform(data.frame(ratio = names(MWU_pvalues),
p_value = unname(MWU_pvalues)),
significance = ifelse(p_value > 0.05, "",
ifelse(p_value <= 0.05 & p_value >= 0.01, "*",
ifelse(p_value <= 0.01 & p_value >= 0.001, "**", "***")
)
)
)
return(final_df)
}
df_list <- by(KFMARATIOS, KFMARATIOS$YEAR, proc_df)
Output
df_list$`2008`
# ratio p_value significance
# 1 CURRENT RATIO 0.20349856
# 2 DEBT TO TOTAL ASSET RATIO 0.39154322
# 3 DEBT TO FARM ASSET RATIO 0.52264808
# 4 EQUITY TO ASSET RATIO 0.42276423
# 5 DEBT TO EQUITY RATIO 0.39162003
# 6 FARM OPERATING PROFIT MARGIN RATIO 0.11726414
# 7 ASSET TURNOVER RATIO 0.01957554 *
# 8 OPERATING EXPENSE RATIO 0.24893798
# 9 DEPRECIATION EXPENSE RATIO 0.02588258 *
# 10 INTEREST EXPENSE RATIO 0.10127823
# 11 NET FARM INCOME RATIO 0.06262773

Resources