Combine columns that have the same row values but are spread out - r

I want to combine these columns based on the value in "Date" so that there are only unique values of date with the corresponding age groups conglomerated. This was a result from using spread() in tidyr. If u look the values for Date are repeated
dput(dataframe) reads ....
structure(list(Date = c("201740", "201740", "201740", "201740",
"201741", "201741", "201741", "201741", "201742", "201742", "201742",
"201742", "201743", "201743", "201743", "201743", "201743", "201743",
"201744", "201744", "201744", "201744", "201744", "201744", "201745",
"201745", "201745", "201745", "201745", "201745", "201746", "201746",
"201746", "201746", "201746", "201746", "201747", "201747", "201747",
"201747", "201747", "201747", "201748", "201748", "201748", "201748",
"201748", "201748", "201749", "201749", "201749", "201749", "201749",
"201749", "201750", "201750", "201750", "201750", "201750", "201750",
"201751", "201751", "201751", "201751", "201751", "201751", "201752",
"201752", "201752", "201752", "201752", "201752", "201801", "201801",
"201801", "201801", "201801", "201801", "201802", "201802", "201802",
"201802", "201802", "201802", "201803", "201803", "201803", "201803",
"201803", "201803", "201804", "201804", "201804", "201804", "201804",
"201804", "201805"), `0-4 yr` = c(NA, 0.1, NA, NA, NA, 0.2, NA,
NA, NA, 0.2, NA, NA, NA, NA, 0.3, NA, NA, NA, NA, NA, 0.6, NA,
NA, NA, NA, NA, 0.7, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA,
1.8, NA, NA, NA, NA, NA, 2.7, NA, NA, NA, NA, NA, 3.3, NA, NA,
NA, NA, NA, 5.2, NA, NA, NA, NA, NA, 7.9, NA, NA, NA, NA, NA,
13.7, NA, NA, NA, NA, NA, 18.3, NA, NA, NA, NA, NA, 23.3, NA,
NA, NA, NA, NA, 28.2, NA, NA, NA, NA, NA, 35.6, NA, NA, NA, 41.9
), `18-49 yr` = c(NA, 0.1, NA, NA, 0.1, NA, NA, NA, NA, 0.2,
NA, NA, NA, 0.2, NA, NA, NA, NA, NA, 0.4, NA, NA, NA, NA, NA,
0.5, NA, NA, NA, NA, NA, 0.7, NA, NA, NA, NA, NA, 1, NA, NA,
NA, NA, NA, 1.4, NA, NA, NA, NA, NA, 1.9, NA, NA, NA, NA, NA,
2.7, NA, NA, NA, NA, NA, 4.2, NA, NA, NA, NA, NA, 6.6, NA, NA,
NA, NA, NA, 9.3, NA, NA, NA, NA, NA, 12.5, NA, NA, NA, NA, NA,
15.2, NA, NA, NA, NA, NA, 17.7, NA, NA, NA, NA, NA), `5-17 yr` = c(0,
NA, NA, NA, 0.1, NA, NA, NA, 0.1, NA, NA, NA, 0.1, NA, NA, NA,
NA, NA, 0.2, NA, NA, NA, NA, NA, 0.3, NA, NA, NA, NA, NA, 0.5,
NA, NA, NA, NA, NA, 0.7, NA, NA, NA, NA, NA, 0.9, NA, NA, NA,
NA, NA, 1.2, NA, NA, NA, NA, NA, 1.7, NA, NA, NA, NA, NA, 2.5,
NA, NA, NA, NA, NA, 3.5, NA, NA, NA, NA, NA, 4.3, NA, NA, NA,
NA, NA, 5.9, NA, NA, NA, NA, NA, 7.3, NA, NA, NA, NA, NA, 9,
NA, NA, NA, NA, NA, NA), `50-64 yr` = c(NA, NA, 0.2, NA, NA,
NA, 0.3, NA, NA, NA, 0.5, NA, NA, NA, NA, NA, 0.8, NA, NA, NA,
NA, NA, 1.1, NA, NA, NA, NA, NA, 1.6, NA, NA, NA, NA, NA, 2.2,
NA, NA, NA, NA, NA, 3.1, NA, NA, NA, NA, 4.1, NA, NA, NA, NA,
NA, 5.4, NA, NA, NA, NA, NA, NA, 8.1, NA, NA, NA, NA, NA, 13.7,
NA, NA, NA, NA, 21.7, NA, NA, NA, NA, NA, NA, 32.6, NA, NA, NA,
NA, NA, 42.9, NA, NA, NA, NA, NA, 52, NA, NA, NA, NA, NA, 60.2,
NA, NA), `65+ yr` = c(NA, NA, NA, 0.5, NA, NA, NA, 1, NA, NA,
NA, 2.1, NA, NA, NA, NA, NA, 3, NA, NA, NA, NA, NA, 3.9, NA,
NA, NA, NA, NA, 5.1, NA, NA, NA, NA, NA, 6.5, NA, NA, NA, NA,
NA, 9.2, NA, NA, NA, NA, NA, 14.3, NA, NA, NA, NA, NA, 20.5,
NA, NA, NA, NA, NA, 30.2, NA, NA, NA, NA, NA, 50.2, NA, NA, NA,
NA, NA, 90.1, NA, NA, NA, NA, NA, 137.9, NA, NA, NA, NA, NA,
179.5, NA, NA, NA, NA, NA, 217.4, NA, NA, NA, NA, NA, 251.8,
NA)), .Names = c("Date", "0-4 yr", "18-49 yr", "5-17 yr", "50-64 yr",
"65+ yr"), class = "data.frame", row.names = c(NA, 97L))

Could try aggregation, this could have been done before your spread. But after works as well
library(tidyverse)
dataframe %>%
group_by(Date) %>%
summarise_all(funs(sum(., na.rm = T)))
I've used sum() here because its not clear how you want to summarise.
A more suitable way might be:
dataframe %>%
gather("age_group", "value", -Date) %>%
filter(!is.na(value)) %>%
spread(age_group, value)
Where we gather the data back to may have what been your original input, this needs to be filtered and then just re-spread

Related

Counting observations and adding them to data frame in R

I have the return og 108 mutual funds and over from 1987 to 2019. I want to count the number of observations in total (excluding NA) over the existence of the funds.
I have been able to get the kurtosis, skewness, etc using the following codes:
kurt <- apply(funds, 2, kurtosis, na.rm = TRUE)
skew <- apply(funds, 2, skewness, na.rm = TRUE)
max <- apply(funds, 2, max, na.rm = TRUE)
min <- apply(funds, 2, min, na.rm = TRUE)
sd <- apply(funds, 2, sd, na.rm = TRUE)
m <- apply(funds, 2, mean, na.rm = TRUE)
Then trying to the same with the number of observations and not succeeding:
obs <- apply(funds, 2, count, na.rm = TRUE)
Getting this error:
Error in UseMethod("group_by_") :
no applicable method for 'group_by_' applied to an object of class "c('double', 'numeric')"
The first 10 lines from the data set is here (funds). It is much longer but this should be sufficiently illustrative. As you can see there are a lot of NA in the first lines. Number of observations here would result in 0, and if one looks at the fund "DK.NORGE" the number of observations in the first 10 lines would be 10.
structure(c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.0090002245623988, 0.00232763847063611,
0.0666744669374286, 0.0541982646590207, 0.0357777115456177, 0.0112375620619904,
0.0517733147448458, 0.0553272554088993, 0.0964919466161833, -0.183504972082187,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, -0.0132758821474321, 0.0246370824973443,
0.0436835891381346, 0.0356472795497187, 0.000293052003410121,
-0.0158201720510295, 0.0677617514139583, 0.0710647033479483,
0.0996190340976313, -0.26700522906759, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 0.0609178826615828, 0.0330911715918167,
0.0246199591154059, 0.0387559218497211, -0.0219724959665873,
0.00576292730999128, 0.0607497869923317, 0.0968700634555142,
0.118662582078258, -0.149187455335955, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.00338009126246408,
0.0625741902662371, -0.0197095435684648, 0.0235653235653237,
-0.0205574774344905, 0.0211513478402079, 0.0440504114817319,
0.0713605727123872, 0.122338724009241, -0.193811951737024, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0.0182819486204802, 0.052568368712947,
0.0223478709564888, 0.0430931528662419, 0.00418444259680784,
0.0149102804245731, 0.0891504229496138, 0.101929676995524, 0.0713342508037151,
-0.184479046400599, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), .Dim = c(10L, 108L), .Dimnames = list(
NULL, c("AI.NORS2", "AI.AKSJN", "AB.AKSPR", "AI.AKTIV", "GA.KAPIT",
"GA.GAMB", "BF.HUMAN", "AB.NOPEN", "VL.AKNOR", "AI.NORGS",
"AI.NORG", "BF.NORGE", "AI.NORGI", "AI.VEKST", "AC.NWECA",
"AC.NEQCB", "AC.NWECD", "AC.NWECI", "NR.NORGE", "BF.NORG",
"CA.AKSJE", "CL.AKNOR", "FF.AKFOR", "FF.NOIII", "FF.NOAI2",
"FF.NORGE", "FF.NORII", "FF.VEKST", "DF.NORGE", "DF.VEKST",
"DK.PBNOR", "DK.NORGE", "DK.NORII", "DI.RINV", "DK.NORG3",
"DK.NORIV", "DK.NOIVR", "DK.NSEL1", "DK.NSEL2", "DK.NSEL3",
"DI.RVKST", "DI.SMB", "EK.NORGE", "NF.PLUSS", "FT.GNRTR",
"FT.NOFOK", "FF.BARNE", "FK.AKTI2", "FK.SPAR", "FV.NORGE",
"FV.TRNDR", "GA.OPPKJ", "GF.AKSJE", "GF.INVES", "SU.AKTIV",
"SU.GLNO", "SU.NORGE", "HF.NORGE", "HB.HNORG", "HO.NORGE",
"KF.IPA", "KL.AKSJE", "KL.AKSNO", "IS.NORGE", "IS.UTBYT",
"IS.UTBYI", "NF.AKSJE", "KF.AVKAS", "KF.BARNE", "KF.KAP",
"KF.KAPIT", "KF.KAIII", "KF.NOPLS", "KF.AKPEN", "KF.SMB",
"KF.SMBII", "KF.VEKST", "OD.NORGE", "OD.NORGA", "OD.NORGB",
"OD.NORGD", "OD.NORII", "OR.FIN30", "PO.AKTIV", "FO.AKSJE",
"FO.INDX", "PV.VEKST", "NF.RFAKS", "NF.RFPLU", "AI.SKAFS",
"SE.NORGE", "SK.HORIS", "SK.SMB", "SR.NORGA", "SR.NORGB",
"SP.INNLA", "SP.AKSJS", "SP.NORGE", "SP.NORGA", "SP.STNOP",
"SP.NORGI", "SP.NOINS", "SP.OPTIM", "SP.VEKST", "SP.VERDI",
"SP.STVEN", "TF.NORGE", "OD.VĂ…RAK")))
Any feedback is appreciated. Thank you.
count is not the right function here. To count number of non-NA value in each column use is.na with sum.
obs <- apply(funds, 2, function(x) sum(!is.na(x)))
However, a better option is colSums which can take input as complete dataframe or matrix.
colSums(!is.na(funds))

Matrix Corrrelation in R

I am trying to create a correlation matrix with R but I am having problems. All the tutorials usually use very small datasets, however, I need to select 8 rows from a big dataset and one more variable that's average of two rows. I am not sure how to do the part where I select specific rows. Can someone help me out with that? I would really appreciate any help.
Someone asked me for a sample:
"NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.1577, NA, 0.2197, NA, 0.348,
NA, 0.086, NA, NA, NA, NA, NA, NA, NA, NA, 0.3768, NA, 0.2163,
NA, 0.336, NA, 0.329, NA, NA, NA, NA, NA, NA, NA, NA, 0.2881,
NA, 0.0632, NA, 0.235, NA, 0.167, NA, NA, NA, NA, NA, NA,
NA, NA, 0.2076, NA, 0.3705, NA, 0.164, NA, 0.255, NA, NA,
NA, NA, NA, NA, NA, NA, 0.1795, NA, 0.3649, NA, 0.246, NA,
0.628, NA, NA, NA, NA, NA, NA, NA, NA, 0.0227, NA, 0.3975,
NA, 0.176, NA, 0.13, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0.5, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
0.6333, NA, 0.3627, NA, 0.603, NA, 0.408, NA, NA, NA, NA,
NA, NA, NA, NA, 0.6667, NA, 0.8889, NA, 0.6, NA, 0.6, NA,
NA, NA, NA, NA, NA, NA, NA, 0.0545, NA, 0.2547, NA, 0.431,
NA, 0.126, NA, NA, NA, NA, NA, NA, NA, NA, 0.2388, NA, 0.5514,
NA, 0.32, NA, 0.424, NA, NA, NA, NA, NA, NA, NA, NA, 0.6667,
NA, 0.3867, NA, 0.313, NA, 0.75, NA, NA, NA, NA, NA, NA,
NA, NA, 0.752, NA, 0.482, NA, 0.349, NA, 0.24, NA, NA, NA,
NA, NA, NA, NA, NA, 0.5161, NA, 0.641, NA, 0.643, NA, 0.438,
NA, NA, NA, NA, NA, NA, NA, NA, 0.3492, NA, 0.3, NA, 0.391,
NA, 0.645, NA, NA, NA, NA, NA, NA, NA, NA, 0.3531, NA, 0.5755,
NA, 0.667, NA, 0.751, NA, NA, NA, NA, NA, NA, NA, NA, 0.2941,
NA, 0.5119, NA, 0.294, NA, 0.526, NA, NA, NA, NA, NA, NA,
NA, NA, 0.2941, NA, 0.1515, NA, 0.3, NA, 0.124, NA, NA, NA, "
Lets say you have a file with 110 rows and 84 columns (this makes your matrix size of 9240).
For reading you file (if your data present in a file)
data <- data.frame(read.csv("file.txt", header=TRUE, sep="\t"))
Use header "TRUE" if you have header in your file else use "FALSE".
Now select any rows according to your need.
For 1 row only (with all columns):
your_df <- data[1,]
For rows 1 to 10 only (with all columns):
your_df <- data[1:10,]
For rows 1, 3 and 10 rows (with all columns):
your_df <- data[c(1,3,10),]
Similarly you can select any columns by entering values after comma in square bracket.
For 3rd row and 2nd column:
your_df <- data[3,2]
For rows from 1st to 10th and columns from 51th to 60th:
your_df <- data[1:10,51:60]

Boxplot/ Box & Whisker help in ggplot2 (R) Need to remove duplicates while also plotting on one plot

So I would like to create a plot of 6 different boxplots (so all in one graphic). I am however running into some trouble.
First, there are many NA's and they are randomly admist the dataset so I am unsure how to plot all the variables on one plot with this issue.
Secondly, ggplot requires a y variable.... I am fairly new to the tidyverse but am unsure how to create a y variable that will allow for each plot to be represented.
Here is the code I have thus far as well as some sample data, just getting some nonsense...:
library(readr)
library(ggplot2)
library(tidyr)
Box_Whiskers_Plot <- read_delim("C:/Users/johnt/Downloads/Box & Whiskers Plot.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
box_tidy <- gather(data = Box_Whiskers_Plot,
key = Concern,
value = Value)
ggplot(data = box_tidy, mapping = aes(x = Concern, y = Value)) +
geom_boxplot()
Sample data:
structure(list(`1 concern` = c(NA, NA, NA, NA, "4.7051072361071977E-2",
"0.19811079686050914", "0.15241809445883892", "9.3784616216209704E-2",
NA, NA, "0.12902642667986841", NA, NA, NA, "-2.7995766112836051E-3",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0.16257072914439274",
NA, NA, NA, NA, NA, NA, NA, "-0.32189822523240785", NA, NA, "8.8779492146409344E-2",
NA, NA, NA, NA, "0.25876167614411516", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, "0.12140341771652063", NA,
NA, NA, NA, NA, NA, NA, "7.8315099203373872E-2", NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "-2.4887790087301243E-2", NA, NA, "0.17817816702345479",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "-0.45715764794257374",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0.1727380710391988",
NA, NA, NA, NA, NA, "-", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "0.11810158539718096", NA, NA, NA,
"0.27340288238873622", NA, NA, "0.31222498045287939", NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "1 concern", "8.3059150913641758E-2"
), `2 concerns` = c(NA, "0.30750786698302712", NA, NA, NA, NA,
NA, NA, "0.19491094633578943", "0.14347068243793348", NA, NA,
NA, NA, NA, "9.4003202704330935E-2", NA, NA, NA, NA, NA, NA,
"5.8682039323707746E-2", NA, NA, NA, NA, NA, NA, "0.38837474884084",
NA, NA, "9.9772158663856914E-2", NA, NA, NA, NA, "0.15369966808838376",
NA, "-9.7591933707396827E-2", "7.5799891559719335E-2", NA, "0.74069094176638783",
"0.18455079764897997", "0.35878241180217119", NA, NA, "9.7671065222774578E-2",
NA, "-1", "1.9762661406333537E-2", NA, NA, NA, NA, NA, NA, "0.12110279127050561",
"-8.8073972864920469E-2", NA, NA, "-5.3063552654085022E-2", "-0.19524178703281547",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0.19324044960582598",
NA, NA, NA, NA, NA, "0.19558769095890249", "9.8480653359761305E-2",
"-2.7258845509566809E-2", NA, NA, "4.2377241471322602E-2", NA,
NA, NA, "-0.31089100169922018", NA, NA, "9.4259642624681561E-2",
NA, NA, NA, NA, NA, NA, NA, NA, "2.9465956237787916E-2", "0.36028868638565514",
"0.28696166852692623", "0.16026874768911181", NA, NA, NA, NA,
NA, NA, NA, "0.17495242646710829", NA, NA, NA, "8.0174590835183634E-2",
NA, NA, NA, NA, NA, "0.3741514609038552", NA, NA, NA, NA, NA,
NA, NA, NA, NA, "0.18203421025448296", NA, NA, NA, "0.18032116561517086",
NA, NA, "-0.24673024063961035", "8.3759133449436751E-2", NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "-0.12597421585178167",
NA, NA, NA, "0.19902495793991903", NA, NA, NA, NA, "-9.2408051470944635E-2",
NA, NA, NA, NA, NA, "1.3515493037121162E-2", NA, NA, "-2.2110562098653141E-2",
NA, NA, NA, NA, "3.5029098159769845E-2", NA, NA, NA, NA, NA,
NA, NA, "-0.38542680544884389", "0.2673937214255111", NA, NA,
NA, NA, NA, NA, "0.1514695655588354", NA, "-0.19290183848262321",
"0.19830470195985717", "0.25974088161209186", "0", "0.12635072134014091",
"4.3529572197642308E-2", NA, "-2.811733193779542E-2", "5.2999441490886978E-2",
"-1.5294438792050502E-2", "-0.1092036064257218", NA, NA, NA,
NA, NA, NA, NA, "-8.4682877918448418E-2", NA, NA, NA, NA, NA,
NA, "0.33060935555613358", "-0.26950721703104663", NA, NA, NA,
NA, NA, NA, NA, NA, NA, "2 concerns", "6.5143247152538983E-2"
), `3 concerns` = c(NA, NA, NA, "-6.2005384615384615E-2", NA,
NA, NA, NA, NA, NA, NA, NA, "0.16466373149154445", "0.14529429748819767",
NA, NA, "3.3101080910433733E-3", NA, NA, "-0.14716333286324612",
NA, NA, NA, "0.101855405108354", NA, NA, NA, "1.5624661137794593E-2",
"4.089776650666388E-2", NA, "-", "0.14868399697158718", NA, NA,
"8.4936940656134663E-2", "-7.3275278911856751E-5", NA, NA, "0.16209406140402915",
NA, NA, NA, NA, NA, NA, "7.5733790938149026E-2", "7.7802906849214093E-2",
NA, "0.29092905402715896", NA, NA, NA, "0.2433591777340911",
NA, NA, "0.16878584978409417", "0.23450765393402495", NA, NA,
NA, NA, NA, NA, "1.4972641033242029E-2", NA, NA, "0.15914858376902719",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0.23167642917280462",
"-0.12014200114033269", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, "0.23296355648900113", "9.2737675544091028E-2",
"-1.135676252608786E-2", "-2.5231545331790839E-3", "-1.831276418414618E-2",
NA, "3.700270212564627E-2", NA, NA, NA, NA, NA, NA, NA, "0.12864133565206637",
NA, "0.2713309994071611", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "0.33081997450170131", NA, NA, NA, NA, NA, NA, "0.10916148370698719",
NA, NA, NA, NA, NA, NA, NA, NA, "-1", "-0.10648202245319915",
NA, NA, NA, NA, NA, NA, "7.6583001375218229E-2", "0.11923826063359644",
"0.1382325704168097", "4.411629139778972E-2", NA, NA, "-2.7571494462436785E-2",
"-8.1186210791162505E-2", "0.36815766123347382", "0.21997253864625099",
"9.5269593575127098E-2", NA, "0.40386694165317971", "0.1317061317077115",
"8.4533840305895946E-2", NA, NA, NA, "7.064976326243011E-2",
"8.2533081202996961E-2", NA, NA, NA, NA, "-6.5935523766861404E-2",
"0.15935278497831473", NA, "0.1159060020401923", NA, NA, "0.11817005685670501",
"6.1029901139001863E-2", "0.12692362698225845", "3.4415424790262605E-2",
NA, NA, "0.23155179134453707", "0.14332216947092591", "7.4795242229677816E-2",
"0", NA, NA, NA, NA, NA, NA, "0.14534924839754271", "0.27815354547396853",
"0.19493428600637031", "0.1283055485269069", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "-2.3773331360783301E-2",
NA, "0.20660830748524073", NA, "0.23154616465669209", NA, "-0.80937062468163068",
NA, NA, "0.41853447897377194", NA, NA, NA, "9.4089917760579844E-2",
NA, NA, "6.3552512454774224E-3", "-0.43971479670164443", "0.15974143122420936",
"-0.16029537031373786", NA, "3 concerns", "7.198593957320798E-2"
), `4 concerns` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "6.4352691220779912E-2", NA, NA,
"0.21279729530946834", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, "-9.3690492677869663E-2", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, "-7.1185121289991993E-2", NA, NA, "6.569732863463229E-2",
NA, NA, "9.7222332805540157E-2", NA, NA, NA, NA, NA, NA, NA,
"8.5074456366478923E-2", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "1.7663351832379881E-3", NA, NA, NA, "0.20859715043286409",
"-4.0588246304824382E-2", NA, NA, NA, NA, NA, NA, NA, NA, "3.1799587621662351E-2",
NA, "8.6166092731043253E-2", NA, NA, NA, NA, NA, NA, NA, "4.869038187032948E-2",
"0.18071545075957585", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "4.2986578596766911E-2",
NA, NA, NA, NA, "9.4277092317434086E-3", NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, "3.5496813684543493E-2", "-8.1501862554191895E-2",
NA, NA, "9.9940934380241986E-2", NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, "4 concerns", "5.1777127158001604E-2"), `5 concerns` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "8.9612836579635591E-2", "8.1063923186028175E-2",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "8.4668999169687842E-2", NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "6.0739595493825904E-2", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, "9.0340384993987888E-2", NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "6.1693228854984072E-2", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
"-0.20631919750140182", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "5 concerns", "3.7399967253821095E-2"), `6 concerns` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "0.32874505543754307", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "0.15408216010209475", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "-4.8467807432570065E-2",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, "6 concerns", "0.14478646936902259"
)), row.names = c(NA, -238L), class = c("tbl_df", "tbl", "data.frame"
))
ggplot requires your data to be "tidy" (see here for details https://cran.r-project.org/web/packages/tidyr/vignettes/tidy-data.html).
In your case just use the gather function from tidyr package. As your numeric values are actually character and not numeric you also ned to convert them to numeric before plotting them. ggplot will drop NA values (with a warning) so you don't need to worry about them.
So the code is as follows:
library(tidyverse)
Box_Whiskers_Plot <- Box_Whiskers_Plot %>%
gather(key = "concerns", "value") %>%
mutate(value = as.numeric(value))
ggplot(Box_Whiskers_Plot, mapping = aes(x = concerns, y = value)) +
geom_boxplot()
Resulting in:

How to further format forest Plots in R, from the metafor package?

I'm quite new to R and have been struggling with properly formatting a forest plot I've created.
When I click the "zoom" option in R to open the graph in a new window, it looks as such:
Forest Plot Currently
My main goal is to get the forest plot as compact as possible, i.e. publication quality/style. I currently have wayyyy too much white space in my plot. I think it has something to do with me messing around with the par() function, and now have no clue how to revert to defaults.
#Metafor library
library(metafor)
#ReadXL library to import excel sheet
library(readxl)
#Name the data sheet from the excel file
ACDF<- read_excel("outpatient_ACDF_meta_analysis.xlsx")
#View the data sheet with view(ACDF)
par(mar=c(20,1,1,1))
#This below measures with risk ratios. If you want to measure odds ratios, use argument measure=OR
returnop <- escalc(measure="OR", ai=op_return_OR, bi=op_no_return_OR, ci=ip_return_OR, di=ip_no_return_OR, data=ACDF)
#Generate a Random Effects Model
REmodel<-rma(yi=yi, vi=vi, data=returnop, slab=paste(Author, Year, sep=", "), method="REML")
#Generate a forest plot of the data
forest(REmodel, xlim=c(-17, 6),
ilab=cbind(ACDF$op_return_OR, ACDF$op_no_return_OR, ACDF$ip_return_OR, ACDF$ip_no_return_OR),
ilab.xpos=c(-9.5,-8,-6,-4.5), cex=.75, ylim=c(-1, 27),
psize=1)
### add column headings to the plot
text(c(-9.5,-8,-6,-4.5), 26, c("Return+", "Return-", "Return+", "Return-"))
text(c(-8.75,-5.25), 27, c("Outpatient", "Inpatient"))
text(-16, 26, "Study", pos=4)
text(6, 26, "Log Odds Ratio [95% CI]", pos=2)
I'm not 100% as to how to provide my data otherwise, but I used the dput function to provide as follows. Apologies for the N/As, still fleshing out the data for the future.
structure(list(Study = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), Author = c("Stieber", "Villavicencio",
"Lied", "Liu", "Garringer", "Joseffer", "Trahan", "Lied", "Sheperd",
"Talley", "Martin", "McGirt", "Adamson", "Fu", "Arshi", "Khanna",
"McClelland", "Purger", "McLellend2", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Year = c(2005, 2007,
2007, 2009, 2010, 2010, 2011, 2012, 2012, 2013, 2015, 2015, 2016,
2017, 2017, 2017, 2017, 2017, 2017, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), op_return_OR = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 1, 3, 2, 16, 257, 7, NA, 5, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), op_no_return_OR = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
596, 769, 992, 4581, 958, 1749, NA, 3120, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_return_OR = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 8, 9, 2, 257, 2034, 12, NA,
200, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), ip_no_return_OR = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 589, 641, 482, 16171, 8930, 1744, NA, 46312, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), op_death = c(NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, 1, NA,
1, 0, NA, 2, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), op_no_death = c(NA, NA, NA, 45, NA,
NA, NA, NA, NA, NA, 596, NA, 993, 4597, NA, 1754, NA, 3125, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), ip_death = c(NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, 0, NA,
0, 42, NA, 2, NA, 20, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), ip_no_death = c(NA, NA, NA, 64,
NA, NA, NA, NA, NA, NA, 597, NA, 484, 16386, NA, 1754, NA, 46492,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
2979.79797979798), op_thrombo = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0, NA, NA, 8, 20, 4, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_thrombo = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 597, NA, NA, 4589, 1195,
1752, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), ip_thrombo = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 2, NA, NA, 67, 150, 4, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_thrombo = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 595, NA, NA, 16361, 10814,
1752, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), op_stroke = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0, NA, NA, 2, 12, 0, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_stroke = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 597, NA, NA, 4595, 1203,
1756, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), ip_stroke = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 2, NA, NA, 14, 132, 0, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_stroke = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 595, NA, NA, 16414, 10832,
1756, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), op_dysphagia = c(NA, NA, NA, 0, NA, NA,
NA, NA, NA, NA, NA, NA, 11, NA, NA, NA, NA, 2, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_dysphagia = c(NA,
NA, NA, 45, NA, NA, NA, NA, NA, NA, NA, NA, 618, NA, NA, NA,
NA, 49, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), ip_dysphagia = c(NA, NA, NA, 1, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, NA, 59, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_dysphagia = c(NA,
NA, NA, 63, NA, NA, NA, NA, NA, NA, NA, NA, 273, NA, NA, NA,
NA, 2917, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), op_hematoma = c(NA, NA, NA, 0, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, 1, 4, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), op_no_hematoma = c(NA,
NA, NA, 45, NA, NA, NA, NA, NA, NA, NA, NA, 629, NA, NA, NA,
2015, 47, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), ip_hematoma = c(NA, NA, NA, 1, NA, NA, NA, NA,
NA, NA, NA, NA, 1, NA, NA, NA, 273, 65, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ip_no_hematoma = c(NA,
NA, NA, 63, NA, NA, NA, NA, NA, NA, NA, NA, 273, NA, NA, NA,
7791, 1713, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)), .Names = c("Study", "Author", "Year", "op_return_OR",
"op_no_return_OR", "ip_return_OR", "ip_no_return_OR", "op_death",
"op_no_death", "ip_death", "ip_no_death", "op_thrombo", "op_no_thrombo",
"ip_thrombo", "ip_no_thrombo", "op_stroke", "op_no_stroke", "ip_stroke",
"ip_no_stroke", "op_dysphagia", "op_no_dysphagia", "ip_dysphagia",
"ip_no_dysphagia", "op_hematoma", "op_no_hematoma", "ip_hematoma",
"ip_no_hematoma"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-35L))
The par option looks ok to me. I changed the ylim option and modified the y location and size of some of the header text as below:
#Generate a forest plot of the data
forest(REmodel, xlim=c(-17, 6),
ylim=c(-1, 10),
ilab=cbind(ACDF$op_return_OR, ACDF$op_no_return_OR, ACDF$ip_return_OR,
ACDF$ip_no_return_OR),
ilab.xpos=c(-9.5,-8,-6,-4.5), cex=.75,
psize=1)
### add column headings to the plot
text(c(-9.5,-8,-6,-4.5), 8.5, c("Return+", "Return-", "Return+", "Return-"),
cex = 0.65)
text(c(-8.75,-5.25), 9.5, c("Outpatient", "Inpatient"))
text(-17, 8.5, "Study", pos=4)
text(6, 8.5, "Log Odds Ratio [95% CI]", pos=2)
This gives the following plot:

singularity error regressing lag in R

I am trying to run a forecasting model where I simply regress earnings on lagged earnings
$Y_{i,t+1}=a+bY_{i,t}$ (edit: the mathematical formulas do not seem to work in so?)
Y(i,t) = a + bY(i,t-1)
Doing so gives me however the following error:
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
NA/NaN/Inf in 'x'
What surprises me is that I was not getting an error message when my model was larger (where I included other variables such as X(i,t).
What can I do to solve this issue? I tried to copy my data with dput but it was too long.
How can i only paste the first 100 values of a vector? as in dput(A[1:100,])?
lm(I(inc.plus1/csout)~I(inc/csout), data=df)
where inc.plus1 is the lagged income (lag+1) and csout are the number of shares outstanding
A represents the first 100 entries of my vector I(inc.plus1/csout):
dput(head(A,600))
structure(c(NA, NA, 1.446, 0.9995, 1.999, 2.902, 3.657, 3.96875,
4.10175, 4.0565, 3.44475, 5.7205, 61.93475, -3.85725, -4.5245390070922,
5.62880429175694, 2.47738918119605, 2.96300124018189, 2.73025552646406,
8.07212115316016, 10.5535326434138, 15.9591327947488, 1.96747660017018,
2.22629738160507, 3.58404764906088, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.37888779527559,
-1.41959644816633, 1.0763348800732, 2.68114979918346, 2.80325842125764,
2.43790873353744, -2.15764063396585, 1.4799008091882, NA, NA,
NA, 1.37544255734004, 1.9209810662425, 2.80281184257786, 4.19364769870183,
4.92729231874391, 2.12704602596336, 0.595823284929961, 2.11401303299297,
2.72348504284468, 2.52902356618847, 2.32582482425984, 0.346862332597876,
1.82379803991995, 2.08507363127918, 5.08163579455077, 5.94934078367083,
7.23445185266149, 5.8378412444986, 4.63755947383151, -2.17174787920901,
1.40756281631882, -4.05886251253207, -0.0212408027324169, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, -0.479369880443708, -0.293425018839488, 0.0929350592321281,
0.123835254518112, 0.12706259774046, 0.116354410972905, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.44976515683205,
4.48091561842034, 4.96571829617233, 1.65772899046248, 0.680583048407414,
1.98560374302681, 4.02555335612741, 4.81968688141083, 0.762689075630252,
1.88469387755102, 3.33593220338983, 0.309443507588533, 3.51697478991597,
2.86402247341315, 2.03178679830559, 5.51008520836107, 3.87780527915746,
2.40556264606919, 5.07700077828744, 4.83989720998532, 4.53742961245445,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1.98851777777778,
0.0187844444444444, 0.128071111111111, 0.699172682926829, 0.907074545454545,
0.795127607650579, 0.357956423686356, -0.463226859004222, 0.60042762366109,
0.855410812777128, 1.70155389507976, NA, NA, NA, 6.28810975609756,
5.65225290697674, 5.50109011627907, 5.46905424200278, 6.82666666666667,
9.63833333333333, 8.50433333333333, 7.02166666666667, 7.61833333333333,
8.26466666666667, 9.84033333333333, 9.239, 8.05, 6.783, 7.553,
8.29666724227249, 11.525749819631, 14.2648420619405, 11.2771348353078,
-14.1189900160153, -17.6900879135628, 14.6432951757972, 17.706462585034,
NA, NA, NA, 2.54016638089795, 2.28029063187611, -1.70405905251086,
-13.3982068926821, 1.08612445944267, 1.02272906761858, -4.90830977239342,
4.93192336244886, 2.71279872376048, 3.58730158730159, 2.67460317460317,
-14.7646639032994, -10.1970204908481, -2.43234807690238, 1.50090714167904,
0.777890139686844, 1.00461845428288, -0.577281452731578, 0.916700774289767,
0.693536467511796, 1.02936271521732, 0.885562723018554, 0.533956763933385,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 6.33737983599889, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 1.97118644067797, 2.74399323181049,
3.00780780780781, 3.58680659670165, 2.81889504170007, 2.50917809245523,
2.79962476547842, 2.3177358490566, 2.98221831785893, 3.53841135956795,
4.84714071243347, 4.42075856720104, 3.43539749078812, 2.10469553058082,
0.698847262247839, 1.15489967560666, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 16.48, 23.5307692307692, 26.2953846153846, 29.1553846153846,
34.8107692307692, 34.1, 31.7707692307692, 35.66, 42.6569230769231,
50.8294573643411, 60.6072371486256, 71.8920605209796, 73.1694942127795,
81.0144689246958, 101.277860326894, 84.9637442320369, 99.4780896110291,
NA, NA, NA, 3.21418291525857, 3.47403755478572, 2.82309202411355,
1.88046287028157, 2.54218477118243, 2.86466949503918, 2.7850953423992,
3.71485649429788, 3.57246064220529, 3.59200878943474, 3.39917935379717,
4.36734916933839, 3.47052845528455, 2.10217797766436, 2.50138966092273,
2.53216008085335, 5.6740159729317, 7.12959606279318, 3.0704500763493,
-9.36316494785294, 2.98453249387051, 0.15134546114962, -0.30269092229924,
NA, NA, NA, 2.5918904109589, 1.72054794520548, 3.37534246575342,
3.15616438356164, 2.44266666666667, 3.72987012987013, 4.66193853427896,
5.6548463356974, -6.70449172576832, 5.53191489361702, 7.55555555555556,
4.56737588652482, -0.699029126213592, 3.77435897435897, 5.6053580628542,
8.82009465467434, 9.75106700157242, 17.3575865849202, 6.79550870390565,
-10.0748330068806, 2.45326283957664, 12.7237136465324, 15.6632796497851,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 6.74737416450689, 3.73783734165596, 3.56847945896463,
4.01606499861509, 4.31513436031426, NA, NA, NA, 1.18216666666667,
1.15616666666667, 0.718833333333333, 0.846666666666667, 0.949833333333333,
0.855, 0.947, 1.362, 1.29283333333333, -0.4095, -1.71166666666667,
-6.73979933110368, -1.07073578595318, 0.439966555183946, 0.889597315436242,
1.29455445544554, 0.888846453280844, 2.07047744964451, 1.42014282853901,
-5.14692281267341, 2.76534467725315, 2.98839786207796, 2.61554688592088,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9.3883876937312,
0.0920378175190796, 1.80967558338076, 4.26608220425823, 4.22229845626072,
8.08242880047986, -2.31627512828963, 7.1832993650083, 1.45599924522962,
-1.63071337035455, 2.12153047215424, -14.241522334182, -13.2118418381361,
NA, NA, NA, 115.623529411765, 210, 224.705882352941, 234.117647058824,
217.714285714286, 235.555555555556, 240, 244.878048780488, 305.853658536585,
316.818181818182, 346.363636363636, 419.724770642202, 467.43119266055,
561.467889908257, 683.48623853211, 792.201834862385, 953.902327704245,
1135.53943789665, 1172.64573991031, 854.803010181496, 1066.1084178052,
1080.66637439719, 1076.95690413369, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, -8.00416281221091, -10.8834412580944, 7.80989824236818,
-6.48704902867715, -27.7520814061055, 4.79648473635523, 8.26732673267327,
4.09595559080095, 4.86207928837189, -9.03804256745433, -3.14144062935344,
0.0326826252097871, -0.859022222222222, 2.15602936527451, NA,
NA, NA, NA, NA, NA, NA, Inf, 1.16978683393883, 1.51189430424169,
3.52039622666496, 3.18081603714028, 3.61737627328914, 3.10188513592364,
-7.09439807225699, -3.69752722803501, 0.707971643687819, 0.327086826731375,
1.23655832779557, 0.575014101013737, 0.473355975527496, -0.122513039981693,
-0.851168008718768, 0.840333176705788, 0.978143386038959, 0.767234041909876,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 3.9974572801517, 3.81505135065159, 3.59021071115013,
2.22077609062425, 1.36322005302483, 2.3571531896427, NA, NA,
NA, NA, NA, NA, NA, NA, 1.37809917355372, 5.96900826446281, 9.65909090909091,
8.60330578512397, 12.426, 21.7577639751553, 22.3953974895398,
12.6962809917355, 2.77066115702479, 5.46201232032854, 13.7374488657494,
4.1607203533809, 9.63248867259607, 12.3481116584565, -16.9937258042985,
9.91789166997749, 13.2756799577502, 7.46975276170437, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
0.564217391304348, -0.212196078431373, 0.0218077803203661, 1.11794583661086,
0.440747736316973, 0.363348739557059, 0.641371480161689, 1.61685900721797,
NA, NA, NA), class = "AsIs")
and B: I(inc/csout)
dput(head(B,600))
structure(c(NA, NA, 1.6175, 1.446, 0.9995, 1.999, 2.902, 3.657,
3.96875, 4.10175, 4.0565, 3.44475, 5.7205, 61.93475, -4.37702127659574,
-5.48471405481619, 5.62880429175694, 2.48174176657021, 2.85056176051437,
2.67146609592373, 7.95068353136579, 10.4269755112345, 15.9375059090479,
1.96710464127044, 2.22664817849489, 3.33193683532066, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2.46958661417323,
3.21177447300029, -1.28516110058166, 1.0933050087961, 2.73595610276559,
2.99856886650604, 2.66279506915451, -2.03313196852743, 1.54029613242514,
NA, NA, 1.00559289855816, 1.37544255734004, 1.9209810662425,
2.80281184257786, 4.19364769870183, 4.92729231874391, 2.12704602596336,
0.595823284929961, 2.11401303299297, 2.79331431759097, 2.4658012212017,
2.32582482425984, 0.346862332597876, 1.82379803991995, 2.08507363127918,
5.71682560206885, 5.63252814515248, 6.95230192325872, 5.36379165580937,
4.63755947383151, -2.23752373944775, 0.801826142613582, -4.05886251253207,
-0.0212111270744482, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.0609118506239777, -0.46619168322258,
-0.273575584586673, 0.0868731271713617, 0.121252977886906, 0.126940380912401,
0.116359932757648, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.295874906555075,
NA, NA, NA, NA, 2.71176375317173, 3.44976515683205, 4.48091561842034,
1.65520964549217, 1.65772899046248, 0.680583048407414, 1.98560374302681,
4.02555335612741, 4.50134453781513, 0.771768707482993, 1.87830508474576,
3.31905564924115, 0.308403361344538, 3.49909705036452, 2.85647576798639,
2.02752629476767, 5.49992524792771, 3.86932091296348, 2.40325224792595,
4.50212922173275, 2.72935574693607, 4.27727254986144, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.189564444444444,
-1.98851777777778, 0.0187844444444444, 0.112452682926829, 0.651501818181818,
0.916795915967128, 0.794619441852948, 0.35890755018202, -0.473067607413648,
0.627831250675921, 0.749854780188053, 1.53659424970853, NA, NA,
5.64291158536585, 5.99563953488372, 5.65225290697674, 5.26390820584145,
5.243, 6.82666666666667, 9.63833333333333, 8.50433333333333,
7.02166666666667, 7.61833333333333, 8.26466666666667, 9.84033333333333,
9.239, 8.05, 6.783, 7.82559143498532, 8.25334112069262, 11.5059848406901,
14.2409094021776, 11.2229529423791, -13.903429300047, -17.9612973562279,
14.619387755102, 17.706462585034, NA, NA, 2.478958140989, 2.18773270954903,
2.18862560772722, -1.69130819883693, -13.3982068926821, 1.07011419816462,
1.02272906761858, -4.90830977239342, 4.93192336244886, 2.72619047619048,
3.58730158730159, 2.49220723035908, -14.3167028199566, -10.2029468004578,
-2.79564571994062, 1.0112571815929, 0.781369908886687, 1.00884137370567,
-0.577493454139731, 0.929452560968677, 0.701576822662082, 1.03605050601517,
0.8831933500736, 0.536452684873194, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0.492431541310724, 6.37958532695375, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 1.18508474576271, 1.9678510998308, 2.43498498498498,
3.00329835082459, 3.58538211954771, 2.81853600059939, 2.51332082551595,
2.81547169811321, 2.32570467867258, 2.98696865802967, 3.53706989369607,
4.81367137790562, 4.3791117807647, 3.42566215683504, 2.10174099052259,
0.698447811272854, 1.15344409599431, NA, NA, NA, NA, NA, NA,
NA, NA, 14.96, 16.48, 23.5307692307692, 26.2953846153846, 29.1553846153846,
34.8107692307692, 34.1, 31.7707692307692, 35.66, 42.9875968992248,
50.9162913495884, 60.8719388550928, 73.0775328999524, 75.8763564616902,
81.3488525672775, 101.094264996704, 84.6151321188249, 99.3965234503116,
NA, NA, 2.3167169493513, 2.94633564772966, 3.08802835253036,
2.72227364294659, 1.88046287028157, 2.54218477118243, 2.86466949503918,
2.7850953423992, 3.71485649429788, 3.8251978743918, 3.59200878943474,
3.45198192946663, 4.8729674796748, 3.45141240082874, 2.10217797766436,
2.71891375276011, 2.53216008085335, 5.6740159729317, 7.12959606279318,
3.25700635087106, -9.72848624269758, 2.98453249387051, 0.15134546114962,
-0.30269092229924, NA, NA, 2.53928767123288, 2.5918904109589,
1.72054794520548, 3.37534246575342, 3.072, 2.37922077922078,
3.39479905437352, 4.66193853427896, 5.6548463356974, -6.70449172576832,
5.53191489361702, 7.55555555555556, 4.68932038834951, -0.738461538461539,
3.79185986604843, 5.57274273187322, 8.79127611345953, 9.6938568354379,
17.2664485641585, 6.76008236653106, -10.0433576656954, 2.44686800894855,
12.7916783806579, 15.9669198174039, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.89339789933161,
4.54057279236277, 3.04295028677116, 3.52709814421568, 3.94625587429463,
0.843121058173297, NA, NA, 1.16633333333333, 1.18216666666667,
1.15616666666667, 0.718833333333333, 0.846666666666667, 0.949833333333333,
0.855, 0.947, 1.362, 1.29283333333333, -0.4095, -1.71739130434783,
-6.73979933110368, -1.07073578595318, 0.441442953020134, 0.874917491749175,
1.15237157924115, 0.816345803596724, 2.04858840018688, 1.40567608276299,
-5.14637884112347, 2.72871855038456, 2.91316668996454, 2.59330191769775,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 11.880985426787,
9.24615559858754, 0.0919749573136027, 1.81008766936127, 4.28461978273299,
4.25916740682639, 8.24690457134787, -2.3184854446709, 7.19083897445574,
1.46494850253928, -1.64066137408225, 2.12416777231924, -14.2285966776933,
-13.1682814768967, NA, NA, 121.211764705882, 115.623529411765,
210, 224.705882352941, 227.428571428571, 211.666666666667, 217.435897435897,
228.292682926829, 244.878048780488, 285, 316.818181818182, 349.54128440367,
419.724770642202, 467.43119266055, 561.467889908257, 683.48623853211,
788.224554997718, 947.416137805984, 1123.31838565022, 1157.59185480301,
851.035698545615, 1060.4997807979, 1083.99296394019, 1079.33010136624,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5.07169287696577,
-8.00416281221091, -10.8834412580944, 7.80989824236818, -6.48704902867715,
-27.7520814061055, 2.01320132013201, 4.82440240172199, 3.09241756831886,
4.76328138092844, -8.83881012865689, -3.3861849659924, 0.0263111111111111,
-0.84864238294285, 2.13807997770656, NA, NA, NA, NA, NA, NA,
Inf, 1.13667965939339, 1.16978683393883, 1.51189430424169, 1.93641281650334,
3.18037779732132, 3.60374959261304, 2.95168386947919, -7.1346652146591,
-3.73879239931005, 0.618398531789007, 0.302060049537848, 1.15002820202747,
0.574163266612055, 0.472550297072245, -0.122848990949101, -0.853532127125251,
0.849206303333824, 0.831485168572279, 0.722226135762859, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 4.87792789880837, 4.00254595667558, 3.88103599648815,
3.94239575801398, 2.22077609062425, 1.36361444621245, 2.33424853766265,
NA, NA, NA, NA, NA, NA, NA, 2.1797520661157, 1.37809917355372,
5.96900826446281, 9.65909090909091, 8.328, 12.8633540372671,
21.9853556485356, 22.1177685950413, 12.6962809917355, 2.75359342915811,
4.94607660840461, 12.5518178729188, 4.10974995804665, 8.56844305120167,
11.0425844346549, -16.8586942126871, 9.88777396355955, 13.2233035244608,
7.5351552135845, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, -1.47473913043478, 0.508901960784314,
-0.0619107551487414, 0.0142787579128741, 1.07581896303132, 0.445689416361701,
0.34090023049512, 0.649568445989942, 1.70371965372768, NA, NA
), class = "AsIs")

Resources