How to change the a axis to a time series in ggplot2 - r

I'm trying to replicate the graph provided at https://www.chicagofed.org/research/data/cfnai/current-data since I will be needing graphs for data sets soon that look like this. I'm almost there, I can't seem to figure out how to change the x axis to the dates when using ggplot2. Specifically, I would like to change it to the dates in the Date column. I tried about a dozen ways and nothing is working. The data for this graph is under indexes on the website. Here's my code and the graph where dataSet is the data from the website:
library(ggplot2)
library(reshape2)
library(tidyverse)
library(lubridate)
df = data.frame(time = index(dataSet), melt(as.data.frame(dataSet)))
df
str(df)
df$data1.Date = as.Date(as.character(df$data1.Date))
str(df)
replicaPlot1 = ggplot(df, aes(x = time, y = value)) +
geom_area(aes(colour = variable, fill = variable)) +
stat_summary(fun = sum, geom = "line", size = 0.4) +
labs(title = "Chicago Fed National Activity Index (CFNAI) Current Data")
replicaPlot1 + scale_x_continuous(name = "time", breaks = waiver(), labels = waiver(), limits =
df$data1.Date)
replicaPlot1
Any sort of help on this would be very much appreciated!
G:\BOS\Common\R-Projects\Graphs\Replica of Chicago Fed National Acitivty index (PCA)\dataSet

Not sure what's your intention with data.frame(time = index(dataSet), melt(as.data.frame(dataSet))). When I download the data and read via readxl::read_excel I got a nice tibble with a date(time) column which after reshaping via tidyr::pivot_longer could easily be plotted and by making use of scale_x_datetime has a nicely formatted date axis:
Using just the first 20 rows of data try this:
library(ggplot2)
library(readxl)
library(tidyr)
df <- pivot_longer(df, -Date, names_to = "variable")
ggplot(df, aes(x = Date, y = value)) +
geom_area(aes(colour = variable, fill = variable)) +
stat_summary(fun = sum, geom = "line", size = 0.4) +
labs(title = "Chicago Fed National Activity Index (CFNAI) Current Data") +
scale_x_datetime(name = "time")
#> Warning: Removed 4 rows containing non-finite values (stat_summary).
#> Warning: Removed 4 rows containing missing values (position_stack).
Created on 2021-01-28 by the reprex package (v1.0.0)
DATA
# Data downloaded from https://www.chicagofed.org/~/media/publications/cfnai/cfnai-data-series-xlsx.xlsx?la=en
# df <- readxl::read_excel("cfnai-data-series-xlsx.xlsx")
# dput(head(df, 20))
df <- structure(list(Date = structure(c(
-87004800, -84412800, -81734400,
-79142400, -76464000, -73785600, -71193600, -68515200, -65923200,
-63244800, -60566400, -58060800, -55382400, -52790400, -50112000,
-47520000, -44841600, -42163200, -39571200, -36892800
), tzone = "UTC", class = c(
"POSIXct",
"POSIXt"
)), P_I = c(
-0.26, 0.16, -0.43, -0.09, -0.19, 0.58, -0.05,
0.21, 0.51, 0.33, -0.1, 0.12, 0.07, 0.04, 0.35, 0.04, -0.1, 0.14,
0.05, 0.11
), EU_H = c(
-0.06, -0.09, 0.01, 0.04, 0.1, 0.22, -0.04,
0, 0.32, 0.16, -0.2, 0.34, 0.06, 0.17, 0.17, 0.07, 0.12, 0.12,
0.15, 0.18
), C_H = c(
-0.01, 0.01, -0.05, 0.08, -0.07, -0.01,
0.12, -0.11, 0.1, 0.15, -0.04, 0.04, 0.17, -0.03, 0.05, 0.08,
0.09, 0.05, -0.06, 0.09
), SO_I = c(
-0.01, -0.07, -0.08, 0.02,
-0.16, 0.22, -0.08, -0.07, 0.38, 0.34, -0.13, -0.1, 0.08, -0.07,
0.06, 0.07, 0.12, -0.3, 0.35, 0.14
), CFNAI = c(
-0.34, 0.02, -0.55,
0.04, -0.32, 1, -0.05, 0.03, 1.32, 0.97, -0.46, 0.39, 0.38, 0.11,
0.63, 0.25, 0.22, 0.01, 0.49, 0.52
), CFNAI_MA3 = c(
NA, NA, -0.29,
-0.17, -0.28, 0.24, 0.21, 0.33, 0.43, 0.77, 0.61, 0.3, 0.1, 0.29,
0.37, 0.33, 0.37, 0.16, 0.24, 0.34
), DIFFUSION = c(
NA, NA, -0.17,
-0.14, -0.21, 0.16, 0.11, 0.17, 0.2, 0.5, 0.41, 0.28, 0.2, 0.32,
0.36, 0.32, 0.33, 0.25, 0.31, 0.47
)), row.names = c(NA, -20L), class = c(
"tbl_df",
"tbl", "data.frame"
))

Related

Melt/ reshape dataframe to combine columns and fill rows with NAs

Apologies that there is a wealth of information on this site about melting and reshaping data, however, I cannot find the answer to my question on any of the pages I've visited. I have a data set which looks something like:
A Year | A Mean Temp | A Max Temp | A Min Temp | B Year | B Mean Temp | B Max Temp | B Min Temp |
and I want to end up with
Year | A Mean Temp | A Max Temp | A Min Temp |B Mean Temp | B Max Temp | B Min Temp
and fill columns which don't have data for that specific year with 'NA'.
The desired output would be something like:
[Table][1]
I believe the answer lies somewhere in something like:
library(dplyr)
library(tidyr)
library(stringr)
Data %>%
pivot_longer(cols = contains("Year"), names_to = c("Country", ".value"),
names_sep="_", values_drop_na = TRUE) %>%
rename_with(~ str_c('Country_', .), Rating:Year)```
But as of yet no luck.
Any help would be appreciated.
Thank you
Data
structure(list(Antarctica.Year.CE = 167:172, Antarctica.Temp..C. = c(0.33,
0.31, 0.18, 0.08, -0.01, -0.11), Antarctica.Min..C. = c(-1.24,
-1.26, -1.39, -1.48, -1.57, -1.67), Antarctica.Max..C. = c(1.89,
1.87, 1.74, 1.64, 1.55, 1.45), Arctic.Year.CE = 1:6, Arctic.Temp..C. = c(-1.15,
-0.96, -0.32, 0.1, -0.18, -0.61), Arctic.Min..C. = c(-1.92, -1.76,
-1.38, -0.74, -1.08, -1.17), Arctic.Max..C. = c(-0.31, -0.11,
0.48, 0.83, 0.73, 0.16), Asia.Year.CE = 800:805, Asia.Temp..C. = c(-0.31,
-0.14, -0.36, -0.67, -0.78, -0.26), Asia.Min..C. = c(-1.4, -1.23,
-1.45, -1.76, -1.87, -1.35), Asia.Max..C. = c(0.79, 0.96, 0.74,
0.43, 0.31, 0.83), Australasia.Year.CE = 1001:1006, Australasia.Temp..C. = c(-0.24,
-0.38, -0.29, -0.33, -0.34, -0.11), Australasia.Min..C. = c(-0.62,
-0.79, -0.71, -0.73, -0.73, -0.56), Australasia.Max..C. = c(0.15,
0.03, 0.13, 0.07, 0.05, 0.34), Europe.Year.CE = 1:6, Europe.Temp..C. = c(0.09,
-0.26, -0.24, 0.22, 0.32, 0.67), Europe.Min..C. = c(-0.69, -1.14,
-1.18, -0.66, -0.48, -0.11), Europe.Max..C. = c(0.88, 0.56, 0.61,
1.07, 1.14, 1.5), North.America...Pollen.Year.CE = c(480L, 510L,
540L, 570L, 600L, 630L), North.America...Pollen.Temp..C. = c(-0.25,
-0.29, -0.33, -0.34, -0.34, -0.34), North.America...Pollen.Min..C. = c(-0.74,
-0.7, -0.66, -0.65, -0.64, -0.64), North.America...Pollen.Max..C. = c(0.24,
0.11, 0, -0.04, -0.04, -0.04), North.America...Trees.Year.CE = c(1204L,
1214L, 1224L, 1234L, 1244L, 1254L), North.America...Trees.Temp..C. = c(-0.22,
-0.45, -0.38, -0.87, -0.81, -0.06), North.America...Trees.Min..C. = c(-0.53,
-0.72, -0.67, -1.12, -1.09, -0.35), North.America...Trees.Max..C. = c(0.04,
-0.2, -0.11, -0.57, -0.52, 0.18), South.America.Year.CE = 857:862,
South.America.Temp..C. = c(-0.3, -0.21, -0.07, -0.38, -0.41,
-0.19), South.America.Min..C. = c(-1.12, -1, -0.88, -1.19,
-1.22, -0.98), South.America.Max..C. = c(0.53, 0.58, 0.74,
0.43, 0.39, 0.61)), row.names = c(NA, 6L), class = "data.frame") ```
[1]: https://i.stack.imgur.com/0sV7a.png
For something as small as this, I'd often just go with a more manual approach.
Given your df above, I specify the lists of countries in the columns and then grepl() on the df columns to select those columns. Then, we rename the columns, return the new dataframe. We can then apply the function to the list of countries and then rbind with do.call.
country_list = c('Antarctica', 'Arctic', 'Asia', 'Australasia', 'Europe', 'North.America...Pollen', 'North.America...Trees', 'South.America')
get_cols = function(country) {
df_new = df[,grepl(country, colnames(df))]
df_new$Country = rep(country, nrow(df_new))
colnames(df_new) = c('Year', 'Temp', 'Min_Temp', 'Max_Temp', 'Country')
return(df_new)
}
df_final = do.call(rbind, lapply(country_list, get_cols))
Hope that returns what you're looking for?

Complex clipping (spatial intersection ?) of polygons and lines in R

I would like to clip (or maybe the right formulation is performing spatial intersection) polygons and lines using a polygon rather than a rectangle, like so:
Here is some code to make the polygons for reproducibility and examples:
p1 <- data.frame(x = c(-0.81, -0.45, -0.04, 0.32, 0.47, 0.86, 0.08, -0.46, -1, -0.76),
y = c(0.46, 1, 0.64, 0.99, -0.04, -0.14, -0.84, -0.24, -0.44, 0.12))
p2 <- data.frame(x = c(-0.63, -0.45, -0.2, -0.38, -0.26, -0.82, -0.57, -0.76),
y = c(-0.1, 0.15, -0.17, -0.79, -1, -0.97, -0.7, -0.61))
l1 <- data.frame(x = c(0.1, 0.28, 0.29, 0.52, 0.51, 0.9, 1),
y = c(0.19, -0.15, 0.25, 0.28, 0.64, 0.9, 0.47))
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1,1))
polygon(p2$x, p2$y, col = "blue")
polygon(p1$x, p1$y)
lines(l1$x, l1$y)
You could use the spatstat package for this. Below the original example is
worked through. In spatstat polygons are used as “observation windows” of
point patterns, so they are of class owin. It is possible to do set
intersection, union etc. with owin objects.
p1 <- data.frame(x = c(-0.81, -0.45, -0.04, 0.32, 0.47, 0.86, 0.08, -0.46, -1, -0.76),
y = c(0.46, 1, 0.64, 0.99, -0.04, -0.14, -0.84, -0.24, -0.44, 0.12))
p2 <- data.frame(x = c(-0.63, -0.45, -0.2, -0.38, -0.26, -0.82, -0.57, -0.76),
y = c(-0.1, 0.15, -0.17, -0.79, -1, -0.97, -0.7, -0.61))
l1 <- data.frame(x = c(0.1, 0.28, 0.29, 0.52, 0.51, 0.9, 1),
y = c(0.19, -0.15, 0.25, 0.28, 0.64, 0.9, 0.47))
In spatstat polygons must be traversed anti-clockwise, so:
library(spatstat)
p1rev <- lapply(p1, rev)
p2rev <- lapply(p2, rev)
W1 <- owin(poly = p1rev)
W2 <- owin(poly = p2rev)
L1 <- psp(x0 = l1$x[-nrow(l1)], y0 = l1$y[-nrow(l1)],
x1 = l1$x[-1], y1 = l1$y[-1], window = boundingbox(l1))
plot(boundingbox(W1,W2,L1), type= "n", main = "Original")
plot(W2, col = "blue", add = TRUE)
plot(W1, add = TRUE)
plot(L1, add = TRUE)
W2clip <- W2[W1]
L1clip <- L1[W1]
plot(W1, main = "Clipped")
plot(W2clip, col = "blue", add = TRUE)
plot(L1clip, add = TRUE)

How do I make segments (of my probabilities?)

I was wondering if there is a function which can help me with segmentation. Via mixtools (logisregmixEM), I got an optimum of 3 segments with corresponding size of 2.5%, 40.3% and 57.2%. I also got posterior probabilities. Is there any way how to create three separate segments with corresponding observations based on the probabilities, in which I end up with 3 segments with the above called sizes?
For what its worth some background information of my coefficients, and probabilities:
> dput(head(betas))
structure(list(comp1 = c(4.57, 0.08, 0.91, -0.11, 0.09, 0.07),
comp2 = c(2.04, -0.22, 0.19, 0.34, -0.34, -0.01), comp3 = c(0.88,
0.03, 0.42, -0.02, -0.17, -0.01)), row.names = c("beta.0",
"beta.1", "beta.2", "beta.3", "beta.4", "beta.5"), class = "data.frame")
> dput(head(posteriorp))
structure(c(0.06, 0.03, 0, 0.03, 0, 0, 0.61, 0.42, 0.07, 0.41,
0.31, 0.41, 0.33, 0.56, 0.93, 0.56, 0.69, 0.59), .Dim = c(6L,
3L), .Dimnames = list(NULL, c("comp.1", "comp.2", "comp.3")))

for loop a non-parametric test filtered by year and save each year's result as a data frame

I'm trying to create and print a list of data frames that are the result of the Mann-Whitney-Wilcoxon Test.
My code currently runs the Mann-Whitney-Wilcoxon Test on all the observations and compares the two data frames, ORATIOS and KFMARATIOS.
library(tidyverse)
library(devtools)
library(inspectdf)
library(readr)
library(broom)
library(knitr)
library(readxl)
library(skimr)
library(kableExtra)
list_ratio <- grep("RATIO",colnames(ORATIOS), value=TRUE)
MWU_pvalues <- unlist(Map(function(a,b) wilcox.test(a, b)$p.value, ORATIOS[list_ratio], KFMARATIOS[list_ratio]))
MWU_pvalues <- as.data.frame(MWU_pvalues) %>%
rename(`P VALUE` = MWU_pvalues)
MWU_pvalues <- tibble::rownames_to_column(MWU_pvalues, "RATIO") %>%
mutate(`Significance` = if_else(`P VALUE` > 0.05, "",
if_else(`P VALUE` <= 0.05 & `P VALUE` >= 0.01, "\\*",
if_else(`P VALUE` <= 0.01 & `P VALUE` >= 0.001, "**", "***"))))
kable(MWU_pvalues) %>%
kable_styling()
How would I create a for loop or lapply filtering on each year, running the above test, saving each result as a dataframe into a list of dataframes? I'd like to have each dataframe for each year printed using kable in my RMarkdown file.
Sample data:
ORATIOS:
structure(list(YEAR = c(2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017), FARM = c("D", "D", "D", "D", "D", "D", "D", "D",
"D", "D", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I"),
`CURRENT RATIO` = c(0.568022785746452, 0.329854720020037,
0.832073159580644, 0.643108790851367, 25.1454874121908, 14.5975395062397,
5.12537888750377, 5.20160770260219, 7.64257374037806, 2.1580962424325,
1.31703632160198, 0.125166573684741, 0.0680923398879462,
0.100452384108057, 0.0998706900125819, 0.0907309088049343,
0.521537398114045, 0.773433351511582, 0.174099653043861,
0.0804425861373205), `WORKING CAPITAL TO GROSS FARMING INCOME` = c(-0.132573843177753,
-0.419436996986394, -0.031444400685141, -0.114022796397208,
1.22962822585944, 0.397841184148093, 0.239623650110705, 0.295681875030473,
0.502930206605254, 0.41862926754376, 0.0513905118422565,
-0.406448322702947, -0.343476652794216, -0.366684678854441,
-0.27321810774102, -0.306827980132377, -0.173010159020099,
-0.140768598200492, -0.367184395657858, -0.888263538055031
), `DEBT TO TOTAL ASSET RATIO` = c(0.0846892634197993, 0.102127561711337,
0.0750728145035032, 0.0797349374471145, 0.0122514875519798,
0.0162967044282012, 0.0165670856047258, 0.0188732833402721,
0.0150968780472965, 0.0275252089477482, 0.1123291162633,
0.151496340475165, 0.0960615511639704, 0.0985641068765839,
0.119816717131179, 0.121164074695269, 0.0970056997272376,
0.139114211255347, 0.0686657852466466, 0.17098484263781),
`DEBT TO FARM ASSET RATIO` = c(0.0935832744841849, 0.114259598684054,
0.0824723632268821, 0.08365143337564, 0.0129689938858425,
0.0191316764222117, 0.0216751963945452, 0.0225358439285237,
0.0167830935834987, 0.030821228954403, 0.140068283663094,
0.203393535891141, 0.133942894025292, 0.137887444914688,
0.17818477721901, 0.182143899668642, 0.141540075268137, 0.212926916788055,
0.0962721755129152, 0.172706971368876), `EQUITY TO ASSET RATIO` = c(0.915310736580201,
0.897872438288663, 0.924927185496497, 0.920265062552885,
0.98774851244802, 0.983703295571799, 0.983432914395274, 0.981126716659728,
0.984903121952704, 0.972474791052252, 0.8876708837367, 0.848503659524835,
0.90393844883603, 0.901435893123416, 0.880183282868821, 0.878835925304732,
0.902994300272762, 0.860885788744653, 0.931334214753353,
0.82901515736219), `DEBT TO EQUITY RATIO` = c(0.0925251502415636,
0.113743954437438, 0.0811661887343104, 0.0866434472975902,
0.0124034482437396, 0.0165666868267717, 0.0168461776723358,
0.0192363361631072, 0.0153282873318188, 0.0283042904566863,
0.126543652970169, 0.178545300040313, 0.106270013503315,
0.109341227289126, 0.13612700838927, 0.137868823072129, 0.107426702137473,
0.161594270778014, 0.0737284040024573, 0.206250562633691),
`RETURN ON FARM ASSETS` = c(0.0170145283510924, -0.00522377886147693,
0.0237250420249203, 0.00257743472229431, 0.0213365859181817,
0.0244609737360482, 0.0279373354305636, 0.0167869242322396,
0.0572363957452595, -0.00273821783417637, 0.0325678749005671,
-0.0532931806283685, 0.024215521265722, -0.0178636730481072,
0.0189254399688753, 0.00211416100547258, -0.00938005681041073,
0.0501921695586829, 0.0215269026374393, -0.0366154070757298
), `RETURN ON ASSETS` = c(0.0566608458884666, 0.0239054711694685,
0.0264084815850861, 0.00576204495548541, 0.179667366138176,
0.0246773695339781, 0.0246552659101915, 0.020526505137709,
0.0551370549195115, -5.05665725060606e-05, 0.0449112877923212,
-0.0284073208306705, 0.0249952584312144, -0.00283565027536605,
0.0360687362998932, 0.0080927754538142, -0.00331579015236834,
0.0457634829675583, 0.0229640648122328, -0.023016837706958
), `RETURN ON EQUITY` = c(0.0168221490501512, -0.00520020437367425,
0.023349291367177, 0.00266962346623839, 0.0204061503508897,
0.0211814836515069, 0.0217131742563291, 0.0143291246913213,
0.0522749822883451, -0.002514608130223, 0.0294232052511338,
-0.0467824450944562, 0.0192125442012039, -0.0141654371518756,
0.0144583817182496, 0.00160025611694793, -0.00711931632857772,
0.0380917883044123, 0.0164860113123938, -0.0437269454184399
), `FARM OPERATING PROFIT MARGIN RATIO` = c(0.113108456739495,
-0.0455472105804567, 0.199838203998892, 0.0234275923606582,
0.158472105656006, 0.183710042172317, 0.190582976791897,
0.124927655425634, 0.45847835351018, -0.0422031337055503,
0.122121670323183, -0.243017854350921, 0.11277681710057,
-0.0790679940692684, 0.076084143213901, 0.00890894198839937,
-0.0450368591167229, 0.204577659697265, 0.13619384495868,
-0.358538500350435), `ASSET TURNOVER RATIO` = c(0.0153974936379558,
-0.00466912018059027, 0.0215963943475807, 0.00245676120615052,
0.0201561446538819, 0.0208362952730876, 0.0213534502396742,
0.0140586870610039, 0.0514857932558134, -0.00244539301601691,
0.0261181226076402, -0.0396950758641658, 0.0173669574034299,
-0.0127692334904846, 0.0127260258857395, 0.00140636256526249,
-0.00642870206654449, 0.0327926792191383, 0.0153539864000432,
-0.0362503005370359), `OPERATING EXPENSE RATIO` = c(0.671535228245263,
0.773166498456329, 0.607985458258, 0.724432447012029, 0.67336000606662,
0.64796797949329, 0.589032574693052, 0.74988495257417, 0.461775664398759,
0.862141471389961, 0.672863504023624, 0.980455882037588,
0.669661413731221, 0.86690216270866, 0.670033358895902, 0.737005445439968,
0.783494244501376, 0.649760819934915, 0.706382908455109,
1.134948535946), `DEPRECIATION EXPENSE RATIO` = c(0.12660532789432,
0.132732814909818, 0.103826844188336, 0.144629676126728,
0.140059287930065, 0.157478624539652, 0.141620283491016,
0.0919194664659044, 0.0583370508964949, 0.133579109920113,
0.150646135557582, 0.183514628711121, 0.146236932328879,
0.16125312788589, 0.191531747619893, 0.197293862401247, 0.193527787561396,
0.0913809290148264, 0.0946887014018637, 0.145522583536315
), `INTEREST EXPENSE RATIO` = c(0.0887509871209225, 0.139647897214309,
0.0883494935547731, 0.107510284500585, 0.028108600347309,
0.0108433537947408, 0.0787641650240354, 0.0332679255342914,
0.0214089311945663, 0.0464825523954769, 0.0543686900956105,
0.0790473436022124, 0.0713248368393299, 0.0509127034747178,
0.0623507502703033, 0.0567917501703862, 0.068014827053951,
0.0542805913529945, 0.0627345451843474, 0.0780673808681226
), `NET FARM INCOME RATIO` = c(0.113108456739495, -0.0455472105804567,
0.199838203998892, 0.0234275923606582, 0.158472105656006,
0.183710042172317, 0.190582976791897, 0.124927655425634,
0.45847835351018, -0.0422031337055503, 0.122121670323183,
-0.243017854350921, 0.11277681710057, -0.0790679940692684,
0.076084143213901, 0.00890894198839937, -0.0450368591167229,
0.204577659697265, 0.13619384495868, -0.358538500350435)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -20L))
KFMARATIOS:
structure(list(YEAR = c(2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008
), FARM = c(11407100, 11484600, 11485100, 11495100, 11801800,
11806400, 11820000, 11885400, 11886000, 11897200, 11897300, 12004500,
12004501, 12303001, 12340101, 12398300, 13050001, 13700201, 13705601,
14089100, 14110900, 14130000, 14130002, 14184100, 14192300, 14330302,
14388200, 14783200, 14786200, 15094200, 15096200, 15584200, 15586100,
15682100, 15683100, 15689100, 16507002, 16580000, 16598200, 16601300
), `CURRENT RATIO` = c(-3, 0, 4.57, 15.94, 2.22, 0, 368.69, 1.86,
9.1, 3.45, 2, 0, 1.58, 6.26, 1.97, 1.54, 0, 3.39, 313.09, 5.59,
5.4, 0, 3.6, 5.78, 3.18, 207.1, 2.36, 28.31, 3.4, 3.68, 0.37,
3.5, 5.6, 13.64, 7.05, 0, 2.23, 0.89, 4.4, 1.11), `WORKING CAPITAL TO GROSS FARMING INCOME` = c(0.783990044655886,
0.939342207539837, 0.468883358203084, 0.53708199556795, 0.429230789973027,
0.856616290636639, 0.46085746623408, 0.019246546772549, 1.04338230212655,
0.318770448161572, 0.398058372857175, 0.506978780306214, 0.263816960947357,
0.4960655740923, 0.101962576323424, 0.220623464476751, 1.12676140487953,
0.533690322762107, 0.685276501922026, 0.703540899065169, 0.660869855557338,
0.71777803486123, 0.319578323479609, 0.722736340214157, 0.286630301648443,
0.818610240507597, 0.184477489966846, 0.78148168000963, 0.357891811040315,
0.289159422203956, -0.125641128630768, 0.392321597654173, 0.561996673317676,
0.353452531903466, 0.683345718597063, 0.804567295215173, 0.307398272114796,
-0.375449779668313, 0.186702574682293, -0.55737251721071), `DEBT TO TOTAL ASSET RATIO` = c(0.02,
0.07, 0.27, 0.37, 0.36, 0, 0.07, 0.37, 0.05, 0.33, 0.42, 0.08,
0.24, 0.34, 0.36, 0.51, 0.01, 0.11, 0.1, 0.07, 0.08, 0.01, 0.32,
0.14, 0.4, 0.52, 0.39, 0.06, 0.21, 0.32, 0.43, 0.52, 0.29, 0.12,
0.17, 0.1, 0.15, 0.87, 0.12, 0.69), `DEBT TO FARM ASSET RATIO` = c(0.0210960466847519,
0.0662443993261916, 0.270051570315789, 0.373240578143398, 0.359031265562519,
0, 0.0678176279710153, 0.369000587598404, 0.04831743727994, 0.33065743433488,
0.41680939549244, 0.0851067276205844, 0.245359588845858, 0.337912727823456,
0.356607488633417, 0.508663012923272, 0.0126098421632802, 0.10665178903834,
0.105106247793806, 0.0698908293989529, 0.0818483764283224, 0.00750932570017385,
0.319501072718455, 0.136757510256717, 0.400840648545665, 0.516753083750126,
0.389587948103612, 0.0577299469460252, 0.206521419569117, 0.315261383020663,
0.43256943562472, 0.520491208048298, 0.290288373137576, 0.120229338185664,
0.173192986515349, 0.104536048245734, 0.151997186500475, 0.868552025800098,
0.123958600776313, 0.692195974317741), `EQUITY TO ASSET RATIO` = c(0.98536882817945,
0.944215770167283, 0.736537746555766, 0.729860554651407, 0.642228778874089,
1, 0.94228148558872, 0.630999412401596, 0.95168256272006, 0.66934256566512,
0.592693562701164, 0.914893272379416, 0.813956784138156, 0.688995447780108,
0.725420084109645, 0.545241148972386, 0.988536562104007, 0.900124825958172,
0.90344241855196, 0.930936390469265, 0.92060316189968, 0.992490674299826,
0.758518009863028, 0.881474617998699, 0.600468426703118, 0.553595877267449,
0.667405715763261, 0.942270053053975, 0.842757601135073, 0.708413078986436,
0.56743056437528, 0.533041296742996, 0.743304732269968, 0.88511363093375,
0.831970255984885, 0.904591907651469, 0.876296809602567, 0.131447974199902,
0.890119750534961, 0.307804025682259), `DEBT TO EQUITY RATIO` = c(0.02,
0.07, 0.37, 0.6, 0.56, 0, 0.07, 0.58, 0.05, 0.49, 0.72, 0.09,
0.32, 0.51, 0.55, 1.04, 0.01, 0.12, 0.12, 0.08, 0.09, 0.01, 0.47,
0.16, 0.67, 1.07, 0.64, 0.06, 0.26, 0.46, 0.76, 1.08, 0.41, 0.14,
0.21, 0.12, 0.18, 6.61, 0.14, 2.25), `RETURN ON FARM ASSETS` = c(0.374484329540697,
0.0498819566035984, 0.181954755022922, 0.193161758267218, 0.0473627311001023,
0.327305563029612, 0.603037930741254, -0.0156737997438482, 0.10397858597475,
0.10789191406389, 0.180771277730155, 0.150007797084, 0.174196776278552,
0.120122100767257, 0.298096858936563, 0.0517125227815447, 0.111597414809764,
0.185024421154621, 0.239979711875599, 0.0808784377916965, 0.201436668181771,
0.135024051506645, 0.251851638310215, 0.103285147847268, 0.14207589091784,
0.247675592658745, 0.100067311604358, 0.308209326567443, 0.154555623216289,
0.174464204907127, 0.00457531564104158, 0.098141499884622, 0.251116584438097,
0.153198476415449, 0.183688952743912, 0.0838032420725189, 0.169288085631256,
0.0279120898963428, 0.147329195543669, 0.034801030826966), `RETURN ON ASSETS` = c(0.260063898261748,
0.0581159003954688, 0.186586004612603, 0.144217266907855, 0.0471965084015535,
0.203276288956977, 0.522691591931166, -0.0156737997438482, 0.104160943214225,
0.110451790466256, 0.178360409188664, 0.150089138729099, 0.134029707705111,
0.120565772385725, 0.229528019076799, 0.0697390623585822, 0.10198296142804,
0.192570247620748, 0.245119340816501, 0.115758491252085, 0.195889106965538,
0.138158444053898, 0.231674956423303, 0.0966027636728098, 0.141766843553559,
0.215113054221126, 0.135495862386357, 0.314351616201071, 0.133076845003381,
0.168262801476855, 0.00457531564104158, 0.0986664889666124, 0.242490501823923,
0.152124266735103, 0.201716489655936, 0.0786665142081486, 0.162659186669921,
0.0279454048764536, 0.134992616527726, 0.034801030826966), `RETURN ON EQUITY` = c(0.263580248064511,
0.0444871419402714, 0.241012793134955, 0.191549228659637, 0.0734886226747657,
0.186089113513671, 0.544673844576945, -0.0248396423765173, 0.109257634896201,
0.161190875342999, 0.298045789765326, 0.163962072531003, 0.162274234481587,
0.160460729376603, 0.31640703656353, 0.0847926292565323, 0.102628180483108,
0.192493344561337, 0.244023637469295, 0.0858503015508329, 0.212255623707772,
0.13604566269794, 0.250952374400512, 0.101551944180348, 0.235835707060263,
0.386487527831846, 0.128000474163853, 0.327092350614891, 0.139632557156543,
0.227780755169442, 0.0080632167674627, 0.165179790324242, 0.298742298993181,
0.165391606109475, 0.214205739228479, 0.084552656304169, 0.157224605882577,
0.212343248849882, 0.146717984157146, 0.113062299136044), `FARM OPERATING PROFIT MARGIN RATIO` = c(0.55,
0.18, 0.29, 0.33, 0.12, 0.46, 0.24, -0.1, 0.14, 0.23, 0.2, 0.22,
0.44, 0.25, 0.33, 0.13, 0.36, 0.44, 0.33, 0.05, 0.32, 0.16, 0.52,
0.3, 0.24, 0.35, 0.2, 0.32, 0.38, 0.29, 0.02, 0.24, 0.36, 0.25,
0.4, 0.18, 0.32, -0.01, 0.08, -0.01), `ASSET TURNOVER RATIO` = c(0.64,
0.2, 0.55, 0.58, 0.29, 0.64, 1.88, 0.39, 0.31, 0.34, 0.72, 0.58,
0.38, 0.41, 0.96, 0.38, 0.26, 0.4, 0.62, 0.41, 0.55, 0.67, 0.53,
0.29, 0.51, 0.86, 0.38, 0.94, 0.4, 0.54, 0.65, 0.49, 0.7, 0.49,
0.41, 0.3, 0.47, 0.62, 0.87, 0.79), `OPERATING EXPENSE RATIO` = c(0.29,
0.57, 0.61, 0.52, 0.69, 0.48, 0.57, 0.89, 0.64, 0.57, 0.72, 0.62,
0.45, 0.55, 0.52, 0.69, 0.49, 0.43, 0.5, 0.75, 0.53, 0.69, 0.38,
0.54, 0.6, 0.54, 0.55, 0.56, 0.5, 0.57, 0.87, 0.61, 0.54, 0.63,
0.44, 0.61, 0.56, 0.82, 0.77, 0.83), `DEPRECIATION EXPENSE RATIO` = c(0.08,
0.16, 0.01, 0.05, 0.07, 0.02, 0.03, 0.09, 0.02, 0.06, 0.03, 0.1,
0.04, 0.08, 0.06, 0.1, 0.06, 0.05, 0.03, 0.04, 0.08, 0.09, 0.04,
0.06, 0.05, 0.01, 0.11, 0.05, 0.04, 0.06, 0.05, 0.08, 0.04, 0.03,
0.06, 0.08, 0.01, 0.1, 0.05, 0.04), `INTEREST EXPENSE RATIO` = c(0.01,
0, 0.03, 0.07, 0.08, 0, 0, 0.06, 0, 0.02, 0.04, 0.01, 0.02, 0.06,
0.03, 0.06, 0, 0, 0.03, 0.01, 0.02, 0, 0.06, 0.01, 0.05, 0, 0.07,
0, 0.04, 0.01, 0.08, 0.1, 0.04, 0.02, 0.03, 0.02, 0.04, 0.04,
0.01, 0.09), `NET FARM INCOME RATIO` = c(0.62, 0.27, 0.35, 0.36,
0.16, 0.5, 0.39, -0.04, 0.34, 0.35, 0.22, 0.28, 0.49, 0.31, 0.39,
0.15, 0.45, 0.51, 0.44, 0.2, 0.37, 0.21, 0.52, 0.39, 0.29, 0.45,
0.27, 0.39, 0.43, 0.36, 0.01, 0.21, 0.37, 0.32, 0.47, 0.28, 0.38,
0.05, 0.17, 0.05)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-40L))
My solution is kind of convuluted but I guess it is never easy to work with list columns,
nested_oratios <- ORATIOS %>%
group_by(YEAR) %>%
nest() %>%
mutate(fake_year = 2008) %>%
ungroup()
nested_kfmaratios <- KFMARATIOS %>%
group_by(YEAR) %>%
nest() %>%
mutate(fake_year = 2008) %>%
ungroup() %>%
select(-YEAR)
nested_comb <- nested_oratios %>%
left_join(nested_kfmaratios,by = c('fake_year'),suffix = c(".oratios", ".kfmaratios")) %>%
select(-fake_year)
logic_pipe <- function(a,b) {
a <- a %>% select(contains('RATIO'))
b <- b %>% select(contains('RATIO'))
MWU_pvalues <- map2(a,b,function(a,b) wilcox.test(a, b)$p.value) %>% unlist()
MWU_pvalues <- as.data.frame(MWU_pvalues) %>%
rename(`P VALUE` = MWU_pvalues)
MWU_pvalues <- tibble::rownames_to_column(MWU_pvalues, "RATIO") %>%
mutate(`Significance` = if_else(`P VALUE` > 0.05, "",
if_else(`P VALUE` <= 0.05 & `P VALUE` >= 0.01, "\\*",
if_else(`P VALUE` <= 0.01 & `P VALUE` >= 0.001, "**", "***"))))
return(MWU_pvalues %>% as_tibble())
}
nested_comb %>%
mutate(result = map2(.x = data.oratios ,.y =data.kfmaratios,logic_pipe))
Consider the apply family with mapply and by (object-oriented wrapper to tapply) that can subset your data by year and pass into a user-defined function. Note: unlist + Map can be replaced with mapply (the underlying function of Map, its wrapper). Below demonstrates with base R where transform replaces mutate and ifelse replaces if_else:
proc_df <- function(df) {
yr <- df$YEAR[1]
MWU_pvalues <- mapply(function(a,b) wilcox.test(a, b)$p.value,
subset(ORATIOS, YEAR==yr)[list_ratio], df[list_ratio])
final_df <- transform(data.frame(ratio = names(MWU_pvalues),
p_value = unname(MWU_pvalues)),
significance = ifelse(p_value > 0.05, "",
ifelse(p_value <= 0.05 & p_value >= 0.01, "*",
ifelse(p_value <= 0.01 & p_value >= 0.001, "**", "***")
)
)
)
return(final_df)
}
df_list <- by(KFMARATIOS, KFMARATIOS$YEAR, proc_df)
Output
df_list$`2008`
# ratio p_value significance
# 1 CURRENT RATIO 0.20349856
# 2 DEBT TO TOTAL ASSET RATIO 0.39154322
# 3 DEBT TO FARM ASSET RATIO 0.52264808
# 4 EQUITY TO ASSET RATIO 0.42276423
# 5 DEBT TO EQUITY RATIO 0.39162003
# 6 FARM OPERATING PROFIT MARGIN RATIO 0.11726414
# 7 ASSET TURNOVER RATIO 0.01957554 *
# 8 OPERATING EXPENSE RATIO 0.24893798
# 9 DEPRECIATION EXPENSE RATIO 0.02588258 *
# 10 INTEREST EXPENSE RATIO 0.10127823
# 11 NET FARM INCOME RATIO 0.06262773

R: Non-normal distribution with specification limits -> quartiles & Cp/Cpk

I am having problem to plot quartiles of mixed distribution and furthermore to calculate Cp & Cpk.
My data:
> dput(hist)
structure(list(index = c(1, 10, 11, 12, 128044, 128045, 128046,
128047, 128048, 128049, 128050, 128051, 128052, 128053, 128054,
128055, 128056, 128057, 128058, 128059, 128060, 128061, 128062,
128063, 128064, 128065, 128066, 128067, 128068, 128069, 128070,
128071, 128072, 128073, 128074, 128075, 128076, 128077, 128078,
128079, 128080, 128081, 128082, 13, 14, 15, 150780, 150781, 150782,
150783, 150784, 150785, 150786, 150787, 150788, 150789, 150790,
150791, 150792, 150793, 150794, 150795, 150796, 150797, 150798,
150799, 150800, 16, 163525, 163526, 163527, 163528, 163529, 163530,
163531, 163532, 163533, 163534, 163535, 163536, 163537, 163538,
163539, 163540, 163541, 163542, 163543, 163544, 163545, 163546,
163547, 163548, 163549, 163550, 163551, 163552, 17), Rundheit = c(0.24,
0.25, 0.23, 0.24, 0.23, 0.24, 0.22, 0.24, 0.21, 0.22, 0.23, 0.24,
0.22, 0.24, 0.27, 0.23, 0.26, 0.27, 0.35, 0.27, 0.27, 0.27, 0.27,
0.27, 0.28, 0.32, 0.31, 0.3, 0.29, 0.28, 0.28, 0.27, 0.28, 0.27,
0.28, 0.28, 0.29, 0.29, 0.28, 0.28, 0.27, 0.26, 0.27, 0.23, 0.26,
0.24, 0.17, 0.52, 0.18, 0.19, 0.17, 0.18, 0.18, 0.18, 0.18, 0.2,
0.17, 0.17, 0.18, 0.18, 0.18, 0.18, 0.18, 0.2, 0.19, 0.18, 0.18,
0.25, 0.23, 0.23, 0.22, 0.23, 0.23, 0.23, 0.22, 0.23, 0.2, 0.21,
0.21, 0.22, 0.23, 0.23, 0.23, 0.23, 0.22, 0.22, 0.23, 0.22, 0.22,
0.22, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.24)), .Names = c("index",
"Rundheit"), row.names = c(17L, 45L, 311125L, 622233L, 872553L,
872581L, 872609L, 872637L, 872665L, 872693L, 872749L, 872777L,
872805L, 872833L, 872861L, 872889L, 872917L, 872945L, 872973L,
873001L, 873057L, 873085L, 873113L, 873141L, 873169L, 873197L,
873225L, 873253L, 873281L, 873309L, 873365L, 873393L, 873421L,
873449L, 873477L, 873505L, 873533L, 873561L, 873589L, 873617L,
873673L, 873701L, 873729L, 933341L, 1244449L, 1555557L, 1579889L,
1579917L, 1579945L, 1579973L, 1580001L, 1580029L, 1580057L, 1580085L,
1580113L, 1580141L, 1580197L, 1580225L, 1580253L, 1580281L, 1580309L,
1580337L, 1580365L, 1580393L, 1580421L, 1580449L, 1580533L, 1866665L,
1976397L, 1976425L, 1976453L, 1976481L, 1976509L, 1976565L, 1976593L,
1976621L, 1976649L, 1976677L, 1976705L, 1976733L, 1976761L, 1976789L,
1976817L, 1976873L, 1976901L, 1976929L, 1976957L, 1976985L, 1977013L,
1977041L, 1977069L, 1977097L, 1977125L, 1977181L, 1977209L, 1977237L,
2177773L), na.action = structure(98:100, .Names = c("2412637",
"2412665", "2412721"), class = "omit"), class = "data.frame")
I have ploted easily ggplot, and the density looks quite good, however quartiles (+/-2s and +/- 3s) are not correct.
My plot:
vec <- quantile(hist$Rundheit, na.rm = TRUE)
ggplot(data=hist, aes(Rundheit)) +
geom_bar(aes( y=..count..), stat="bin",position="dodge", fill="gray40", colour="white") +
stat_density(color="red", geom="line", size=1, position="identity") +
geom_vline(xintercept=vec, linetype=2, colour="blue", size=1) + #Tolerance/Limits
geom_vline(aes(xintercept=0.55), size = 1, color="red") + #Tolerance/Limits
geom_vline(aes(xintercept=0), size = 1, color="red")
Furthermore I have tried to calculate Cp and Cpk using SixSigma package:
library(SixSigma)
cp<- ss.ca.cp(hist$Rundheit, 0,0.55)
cp
[1] 1.922963
cpk <- ss.ca.cpk(hist$Rundheit, 0,0.55)
cpk
[1] 1.658759
However the numbers of cp and cpka calculated by SixSigma do not match the numbers which i received by using another programme, whereas
cp=2.35 and cpk=2.11
Just for the info i do not have much background in statistics
Thanks for the tipps!
How about something like this? Is this what your are after? I don't really know what cp, cpk, LSL and USL are, to be honest.
(I renamed hist to dat, as hist is a very commonly used function.)
m <- mean(dat$Rundheit)
s <- sd(dat$Rundheit)
vec <- data.frame(val = c(m, m - 3*s, m + 3*s, m - 5*s, m + 5*s),
sigma = factor(c('mean', '3s', '3s', '5s', '5s'), c('mean', '3s', '5s')))
library(ggplot2)
ggplot(data=dat, aes(Rundheit)) +
geom_bar(aes( y=..count..), stat="bin",position="dodge", fill="gray40",
colour="white") +
stat_density(color="red", geom="line", size=1, position="identity") +
geom_vline(data = vec, aes(xintercept = val, lty = sigma),
colour = "blue", size = 1)

Resources