Simplify plot (ggplot) with for-loop in R - r

I tried to read several calculated variables into different columns of a data frame which is not possible due to the fact that all cols have to have the same length. So it created the list (Data_Overall) and peu a peu create layers of my ggplot.
I was unable to employ a for-loop here, or the paste-function, so that my code looks the following:
Data_Overall
as.data.frame(Data_Overall[[1]])
ggplot() +geom_point(data=data.frame(Data_Overall[[1]]),aes(1,as.numeric(Data_Overall[[1]])))+
geom_point(data=data.frame(Data_Overall[[2]]),aes(2,as.numeric(Data_Overall[[2]])))+
geom_point(data=data.frame(Data_Overall[[3]]),aes(3,as.numeric(Data_Overall[[3]])))+
geom_point(data=data.frame(Data_Overall[[4]]),aes(4,as.numeric(Data_Overall[[4]])))+
geom_point(data=data.frame(Data_Overall[[5]]),aes(5,as.numeric(Data_Overall[[5]])))+
geom_point(data=data.frame(Data_Overall[[6]]),aes(6,as.numeric(Data_Overall[[6]])))+
geom_boxplot(data=data.frame(Data_Overall[[1]]),aes(1,as.numeric(Data_Overall[[1]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[2]]),aes(2,as.numeric(Data_Overall[[2]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[3]]),aes(3,as.numeric(Data_Overall[[3]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[4]]),aes(4,as.numeric(Data_Overall[[4]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[5]]),aes(5,as.numeric(Data_Overall[[5]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[6]]),aes(6,as.numeric(Data_Overall[[6]]),alpha=0.2))
Data
Data_Overall<-list()
Data_Overall[[1]]<-c("90","80","90","90","80","70","70","100","100","50","99.9","70","50","80","30","50","50","90","90","50","60","85","50","10", "50", "30", "50", "30", "95", "50", "50", "50", "20", "50", "100", "60")
Data_Overall[[2]]<-c("80","100","70")
Data_Overall[[3]]<-c("100","50","99.9","70","50","80","30","50","50","90","90","50")
Data_Overall[[4]]<-c("80","100","70")
Data_Overall[[5]]<-c("100","50","99.9","70","50","80","50","50","90","90","30","50","50","90","90","50")
Data_Overall[[6]]<-c("50","88","70","76")

Okay here is the list of values for Data_Overall, know it should be ready to run. The problem is essentially what Roman referred to that I had some loops and read data of different sizes which were not applicable to a matrix or data.frame - thus I chose the list().
Data_Overall<-list()
Data_Overall[[1]]<-c("90","80","90","90","80","70","70","100","100","50","99.9","70","50","80","30","50","50","90","90","50","60","85","50","10" "50" "30" "50" "30" "95" "50" "50" "50" "20" "50" "100" "60")
Data_Overall[[2]]<-c("80","100","70")
Data_Overall[[3]]<-c("100","50","99.9","70","50","80","30","50","50","90","90","50")
Data_Overall[[4]]<-c("80","100","70")
Data_Overall[[5]]<-c("100","50","99.9","70","50","80","50","50","90","90","30","50","50","90","90","50")
Data_Overall[[6]]<-c("50","88","70","76")
ggplot() +
geom_point(data=data.frame(Data_Overall[[1]]),aes(1,as.numeric(Data_Overall[[1]]))) +
geom_point(data=data.frame(Data_Overall[[2]]),aes(2,as.numeric(Data_Overall[[2]]))) +
geom_point(data=data.frame(Data_Overall[[3]]),aes(3,as.numeric(Data_Overall[[3]])))+
geom_point(data=data.frame(Data_Overall[[4]]),aes(4,as.numeric(Data_Overall[[4]])))+
geom_point(data=data.frame(Data_Overall[[5]]),aes(5,as.numeric(Data_Overall[[5]])))+
geom_point(data=data.frame(Data_Overall[[6]]),aes(6,as.numeric(Data_Overall[[6]])))+
geom_boxplot(data=data.frame(Data_Overall[[1]]),aes(1,as.numeric(Data_Overall[[1]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[2]]),aes(2,as.numeric(Data_Overall[[2]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[3]]),aes(3,as.numeric(Data_Overall[[3]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[4]]),aes(4,as.numeric(Data_Overall[[4]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[5]]),aes(5,as.numeric(Data_Overall[[5]]),alpha=0.2))+
geom_boxplot(data=data.frame(Data_Overall[[6]]),aes(6,as.numeric(Data_Overall[[6]]),alpha=0.2))

The OP has supplied data in a list Data_Overall. Each list element is a vector of calculated numeric values of a particular variable. The vectors do have varying lengths.
As ggplot2 prefers data to be supplied in long format anyway, the list of vectors needs to be converted into a data.frame with columns: Variableand Value. (This follows the suggestion in this comment).
Prepare data
library(data.table) # CRAN version 1.10.4 used here
# convert each list element into a data.table,
# combine resulting list of data.tables into one large data.table
# thereby creating an id column named Variable
DT <- rbindlist(lapply(Data_Overall, data.table), idcol = "Variable")
# rename the Value column
setnames(DT, "V1", "Value")
# convert Value from character to numeric
DT[, Value := as.numeric(Value)]
# turn Variable into factor to avoid continuous scale when plotting
DT[, Variable := factor(Variable)]
DT
# Variable Value
# 1: 1 90
# 2: 1 80
# 3: 1 90
# 4: 1 90
# 5: 1 80
# 6: 1 70
# ...
#68: 5 90
#69: 5 90
#70: 5 50
#71: 6 50
#72: 6 88
#73: 6 70
#74: 6 76
# Variable Value
Create plot
library(ggplot2)
ggplot(DT, aes(Variable, Value, group = Variable)) +
geom_boxplot() +
geom_point()
The boxplots are plotted in the first layer, and the points on top. So, there is no need to set alpha = 0.2.
Data
Data_Overall <- list(c("90", "80", "90", "90", "80", "70", "70", "100", "100",
"50", "99.9", "70", "50", "80", "30", "50", "50", "90", "90",
"50", "60", "85", "50", "10", "50", "30", "50", "30", "95", "50",
"50", "50", "20", "50", "100", "60"), c("80", "100", "70"), c("100",
"50", "99.9", "70", "50", "80", "30", "50", "50", "90", "90",
"50"), c("80", "100", "70"), c("100", "50", "99.9", "70", "50",
"80", "50", "50", "90", "90", "30", "50", "50", "90", "90", "50"
), c("50", "88", "70", "76"))

Related

Group periodic data in dataframe

I have a dataset comprised of periodic data. I want to group the data by period.
The full dataset is provided: LINK
Data for one period of the dataset is provided and ploted:
> dput(DATA[1:122,c(2,9)])
structure(list(Actuator.Force = c(-4853.5854, -4566.9771, -4198.7612,
-3774.5527, -3317.6958, -2847.5229, -2364.7585, -1880.9485, -1405.4272,
-930.289, -467.04822, -18.867363, 421.17499, 838.86719, 1239.9121,
1626.0669, 1990.6389, 2334.0852, 2655.344, 2962.0227, 3243.7817,
3506.2249, 3744.2622, 3959.8271, 4156.7061, 4324.9048, 4469.229,
4591.6689, 4687.4194, 4764.0801, 4814.6167, 4840.313, 4846.0181,
4826.3135, 4777.6553, 4696.0791, 4583.854, 4442.457, 4272.5254,
4076.7224, 3851.1211, 3603.1853, 3330.7456, 3038.3157, 2724.115,
2386.5476, 2032.5809, 1660.0547, 1268.0084, 859.16675, 432.4075,
-14.131592, -479.29309, -955.67108, -1444.614, -1937.2562, -2437.0085,
-2941.8914, -3450.9009, -3959.9597, -4468.9795, -4981.2549, -5492.6997,
-6002.334, -6510.5425, -7016.2432, -7517.8286, -8013.1348, -8500.4199,
-8974.8867, -9439.5479, -9890.5938, -10326.367, -10744.421, -11147.754,
-11534.83, -11902.651, -12248.997, -12577.919, -12885.458, -13172.309,
-13441.554, -13691.502, -13922.634, -14127.116, -14305.272, -14458.267,
-14582.934, -14685.274, -14758.539, -14806.058, -14830.719, -14836.625,
-14822.204, -14773.916, -14700.484, -14597.968, -14469.834, -14312.099,
-14126.422, -13915.136, -13676.505, -13412.388, -13120.703, -12807.961,
-12473.883, -12115.751, -11740.082, -11342.633, -10929.945, -10502.158,
-10062.869, -9611.8271, -9146.6006, -8673.3545, -8191.7417, -7700.769,
-7200.9346, -6695.8809, -6185.2378, -5670.8711, -5154.9995),
Rotation = c(-0.005985651, -0.00565783616666667, -0.00522075016666667,
-0.0046743925, -0.00406732866666667, -0.00343598223333333,
-0.00286534205, -0.00219757165, -0.00156622503333333, -0.000934878566666667,
-0.000267108158333333, 0.000303531998333333, 0.00084988955,
0.0013962471, 0.00193046351666667, 0.00242825596666667, 0.00288962463333333,
0.0033995583, 0.0038366445, 0.00424944783333333, 0.004637969,
0.0050507725, 0.005378587, 0.00565783616666667, 0.00594922716666667,
0.00620419383333333, 0.006410596, 0.0065684325, 0.00670198666666667,
0.00683554116666667, 0.0069205295, 0.00699337683333333, 0.0070055185,
0.006993377, 0.00696909483333333, 0.00688410516666667, 0.006774834,
0.00659271483333333, 0.006386313, 0.00613134633333333, 0.0058399555,
0.0055364235, 0.00518432633333333, 0.00483222916666667, 0.0044072845,
0.00403090483333333, 0.00353311216666667, 0.0029988961, 0.00251324506666667,
0.0020275938, 0.00144481233333333, 0.00086203085, 0.000303531998333333,
-0.000315673273333333, -0.000983443666666667, -0.00162693151666667,
-0.00233112578333333, -0.0029988961, -0.00366666666666667,
-0.00433443683333333, -0.00496578316666667, -0.00563355366666667,
-0.0062770415, -0.0069690945, -0.0076611475, -0.00836534183333333,
-0.00902097083333333, -0.00968874116666667, -0.0103443703333333,
-0.0109514346666667, -0.011594922, -0.012177704, -0.0127969093333333,
-0.0133918318333333, -0.0139746131666667, -0.0145209698333333,
-0.014982339, -0.0154437081666667, -0.0159050765, -0.0163178798333333,
-0.0167185421666667, -0.0170706398333333, -0.0174105943333333,
-0.0177505506666667, -0.018017659, -0.0182483433333333, -0.0184547455,
-0.0186490056666667, -0.0187704183333333, -0.01887969, -0.0189525386666667,
-0.018988962, -0.0190011033333333, -0.018988962, -0.0189768206666667,
-0.0189282553333333, -0.0188189838333333, -0.018673289, -0.018442604,
-0.0182240605, -0.017993377, -0.0176534206666667, -0.0173256068333333,
-0.0169492263333333, -0.0165485635, -0.0161357608333333,
-0.0156622516666667, -0.0152373058333333, -0.0147152313333333,
-0.0141931568333333, -0.0136103748333333, -0.0130275935,
-0.0123962468333333, -0.0118013235, -0.0112064015, -0.0105507718333333,
-0.00993156683333333, -0.0092637965, -0.008620309, -0.00791611466666667,
-0.00719977883333333, -0.0065441495)), row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
"101", "102", "103", "104", "105", "106", "107", "108", "109",
"110", "111", "112", "113", "114", "115", "116", "117", "118",
"119", "120", "121", "122"), class = "data.frame")
The next row of the data starts a new period and so on. I want to group the rows by the periods that exist in the data.
Although the code provided in this example can find the periods if fully completed data is provided (it worked for the first three periods), when applied to the entire dataset of periodic data, it resulted in NaNs.
ssp <- spectrum(DATA$Rotation, plot = FALSE, method = "ar", n.freq = 1e6)
period <- 1 / with(ssp, freq[spec == max(spec)])
DATA$Loop <- (seq_len(nrow(DATA)) %/% period) + 1
Here's an approach where I define a region where both the Rotation is increasing and Rotation is at least as high as the starting value. Then I count how many times we enter that region and that's the period number.
library(dplyr)
Rot_start = DATA$Rotation[1]
rbind(DATA, DATA) %>% # to demonstrate two cycles
mutate(Rot_pos = Rotation > lag(Rotation, default = -1),
Rot_rightside = Rotation >= Rot_start,
Rot_region = Rot_pos & Rot_rightside,
new_period = Rot_region & !(lag(Rot_region, default = FALSE)),
period = cumsum(new_period))
# more succinct, same workings
# mutate(Rot_region = Rotation > lag(Rotation, default = -1) & Rotation >= Rot_start,
# period = cumsum(Rot_region & !(lag(Rot_region, default = FALSE))))

Why are those 3 loops faster than 1 short lapply

While trying to optimize and benchmark a function, I was able to shrink 3 for loops into 1 short lapply call, but the function got slower.
I am trying to understand why that happens, as with the 3 loops I preallocate 3 lists with the same length and fill them in 3 different loops, which doesnt seem necessary and inefficient.
## Data #################
Grid = structure(list(ID = 1:81, X = c(99.99922283, 299.99922281, 499.9992228,
699.99922279, 899.99922277, 1099.99922275, 1299.99922274, 1499.99922273,
1699.99922271, 99.99922293, 299.99922291, 499.99922291, 699.99922289,
899.99922287, 1099.99922286, 1299.99922284, 1499.99922283, 1699.99922282,
99.99922303, 299.99922302, 499.99922301, 699.999223, 899.99922298,
1099.99922296, 1299.99922295, 1499.99922294, 1699.99922292, 99.99922314,
299.99922312, 499.99922311, 699.9992231, 899.99922308, 1099.99922307,
1299.99922306, 1499.99922304, 1699.99922303, 99.99922324, 299.99922323,
499.99922322, 699.9992232, 899.99922319, 1099.99922317, 1299.99922316,
1499.99922315, 1699.99922313, 99.99922335, 299.99922333, 499.99922332,
699.99922331, 899.9992233, 1099.99922328, 1299.99922327, 1499.99922325,
1699.99922324, 99.99922345, 299.99922344, 499.99922342, 699.99922341,
899.9992234, 1099.99922338, 1299.99922337, 1499.99922335, 1699.99922334,
99.99922356, 299.99922354, 499.99922353, 699.99922352, 899.9992235,
1099.99922348, 1299.99922347, 1499.99922345, 1699.99922344, 99.99922367,
299.99922365, 499.99922364, 699.99922362, 899.99922361, 1099.99922359,
1299.99922358, 1499.99922356, 1699.99922355), Y = c(1699.9975638,
1699.99756369, 1699.99756357, 1699.99756347, 1699.99756336, 1699.99756325,
1699.99756314, 1699.99756303, 1699.99756292, 1499.99756399, 1499.99756388,
1499.99756377, 1499.99756366, 1499.99756355, 1499.99756344, 1499.99756333,
1499.99756322, 1499.99756311, 1299.99756418, 1299.99756408, 1299.99756396,
1299.99756386, 1299.99756375, 1299.99756363, 1299.99756353, 1299.99756342,
1299.99756331, 1099.99756438, 1099.99756427, 1099.99756416, 1099.99756405,
1099.99756394, 1099.99756384, 1099.99756372, 1099.99756361, 1099.99756351,
899.99756457, 899.99756446, 899.99756434, 899.99756424, 899.99756414,
899.99756403, 899.99756392, 899.99756381, 899.9975637, 699.99756477,
699.99756466, 699.99756454, 699.99756443, 699.99756433, 699.99756422,
699.99756411, 699.99756401, 699.99756389, 499.99756496, 499.99756485,
499.99756474, 499.99756463, 499.99756452, 499.99756441, 499.9975643,
499.9975642, 499.99756409, 299.99756516, 299.99756505, 299.99756494,
299.99756483, 299.99756472, 299.99756461, 299.9975645, 299.99756439,
299.99756428, 99.99756535, 99.99756524, 99.99756513, 99.99756502,
99.99756491, 99.9975648, 99.99756469, 99.99756458, 99.99756448
)), row.names = c("11", "12", "13", "14", "15", "16", "17", "18",
"19", "21", "22", "23", "24", "25", "26", "27", "28", "29", "31",
"32", "33", "34", "35", "36", "37", "38", "39", "41", "42", "43",
"44", "45", "46", "47", "48", "49", "51", "52", "53", "54", "55",
"56", "57", "58", "59", "61", "62", "63", "64", "65", "66", "67",
"68", "69", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"81", "82", "83", "84", "85", "86", "87", "88", "89", "91", "92",
"93", "94", "95", "96", "97", "98", "99"), class = "data.frame")
mut2 = sapply(1:100, function(i) sample(c(0,1), size = nrow(Grid), replace = T))
## Functions #################
## Triple For loop
getRects <- function(trimtonOut, Grid){
len1 <- dim(trimtonOut)[2]
childli = childnew = rectidli = vector("list", len1);
for (i in 1:len1) {
childli[[i]] <- trimtonOut[,i]
}
for (u in 1:len1){
rectidli[[u]] <- which(childli[[u]]==1, arr.ind = T)
}
for (z in 1:len1) {
childnew[[z]] <- Grid[rectidli[[z]],];
}
return(childnew)
}
## Shortest Lapply
getRects1 <- function(trimtonOut, Grid){
lapply(1:dim(trimtonOut)[2], function(i) {
Grid[which(trimtonOut[,i]==1, arr.ind = T),]
})
}
## Shorter Lapply
getRects2 <- function(trimtonOut, Grid){
lapply(1:dim(trimtonOut)[2], function(i) {
tmp = which(trimtonOut[,i]==1, arr.ind = T)
Grid[tmp,]
})
}
## Longest Lapply
getRects3 <- function(trimtonOut, Grid){
lapply(1:dim(trimtonOut)[2], function(i) {
tmp = trimtonOut[,i]
tmp1 = which(tmp==1, arr.ind = T)
Grid[tmp1,]
})
}
## Execute and Compare #################
getRectV <- getRects(mut2, Grid)
getRectV1 <- getRects1(mut2, Grid)
getRectV2 <- getRects2(mut2, Grid)
getRectV3 <- getRects3(mut2, Grid)
identical(getRectV,getRectV1)
identical(getRectV,getRectV2)
identical(getRectV,getRectV3)
## Benchmark #################
library(microbenchmark)
# mut2 = sapply(1:400, function(i) sample(c(0,1), size = nrow(Grid), replace = T))
mc = microbenchmark(
loop = getRects(mut2, Grid),
lap1 = getRects1(mut2, Grid),
lap2 = getRects2(mut2, Grid),
lap3 = getRects3(mut2, Grid)
)
mc
Are you sure that those time differences are that significant?
library(microbenchmark)
# mut2 = sapply(1:400, function(i) sample(c(0,1), size = nrow(Grid), replace = T))
mc = microbenchmark(
loop = getRects(mut2, Grid),
lap1 = getRects1(mut2, Grid),
lap2 = getRects2(mut2, Grid),
lap3 = getRects3(mut2, Grid)
)
mc
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> loop 2.651485 2.699166 3.195301 2.756171 3.136741 8.010173 100
#> lap1 2.755571 2.828128 3.098850 2.877806 3.012487 7.427598 100
#> lap2 2.737105 2.808924 3.118260 2.863221 2.939996 13.706736 100
#> lap3 2.719101 2.787040 3.191893 2.852963 3.004811 8.490867 100

Change color of a specific coordinate

I want to change the color of a certain coordinate, actually is the same coordinate which has the annotation.
Any ideas?
p1 <- ggplot(HiBAP1517, aes(BPM, Yld)) +
geom_point(shape=16) +
geom_smooth(method=lm, se = F) +
theme(axis.title.x = element_text(color="black", size=14, face="bold"),
axis.title.y = element_text(color="black", size=14, face="bold"))
p2 <- p1 +
annotate(geom="text", x=1879, y=892.02, label="Rialto",
color="darkorange", size = 5, hjust=1, vjust=1.3, fontface =2)
p3 <- p2 +
annotate(geom="text", x=1654.75, y=834.2375, label="Savannah",
color="firebrick1", size = 5, hjust=1, vjust=1, fontface =2)
pfinal <- p3 +
labs(x = expression("AGDM"[PM]^{}*(gm^{-2})),
y = expression("GY"*(gm^{-2})))
This is my output, but I would like to change the color (instead of black) of those 2 coordinates:
Data sample:
Genotype,BPM,Yld
1,1767.793447,747.0708034
2,2074.815941,775.8880562
3,2197.933995,854.3810136
5,2085.627286,845.9306447
6,1908.97774,841.4318038
7,2120.24666,875.5534429
8,2226.617509,764.3849451
9,2035.68002,810.2658242
10,2153.727,861.7024631
11,1993.568134,782.5763292
12,2013.199982,822.6565187
13,2078.275912,837.2819632
14,2042.456487,802.6913977
16,1840.058841,767.6509829
17,2013.338146,801.2064103
18,2087.151352,822.1910199
19,1988.038384,859.573342
20,2083.092896,887.2783898
21,2072.905795,861.3044422
23,1849.744525,723.5014595
24,1785.04038,747.4940519
25,2078.402869,835.7669124
26,1698.390774,681.256732
27,2065.842661,852.3073467
28,2020.285009,811.6889063
29,2039.137248,821.7951099
30,1855.665106,781.0350726
31,1792.32475,744.9001931
32,1992.616447,860.7054072
33,2025.79755,834.1452611
34,2023.274784,835.4102703
35,1703.837196,682.9995098
36,1740.44177,713.3121368
37,1970.331012,816.5239645
38,1990.223669,838.9949534
39,2081.559891,822.5936391
40,1968.990856,852.1259441
41,2178.322511,920.80226
42,1887.572381,721.0746569
43,2103.964882,821.6521912
44,2097.040605,873.0062511
45,1864.779016,755.1746154
46,1935.743565,895.4951282
47,2191.797365,888.7284615
48,1968.150754,863.7490909
49,1858.735915,759.7144347
50,1933.34954,774.4202087
51,1680.540128,717.2402198
52,1748.214736,783.3395385
53,2183.694734,855.5897436
54,2142.662802,912.635349
55,1892.205584,776.5070164
56,2230.304238,887.8378102
57,2141.882287,903.7212821
58,1983.755009,815.5541958
59,1954.653032,743.0290819
60,1801.192428,718.5391635
61,1920.709571,808.6727692
62,1796.291216,699.0526007
63,2026.074655,909.3961954
64,1863.574774,729.9547929
65,1924.971832,770.2818388
66,2129.910527,794.0297343
67,2090.201938,809.6094569
68,1987.074651,731.8146606
69,2053.104282,839.4181954
70,1872.403668,787.2339391
71,1961.144455,824.335206
72,2135.414422,881.9237509
73,1857.780642,779.9428159
74,2058.696424,840.2234927
76,2169.489819,805.3868184
77,1891.844601,756.8752683
78,2099.708756,830.6765073
79,1976.981377,786.4878009
81,1932.909878,800.0033701
82,2101.603045,834.2990498
83,1867.872044,735.4201911
84,1870.947954,703.6186056
85,2135.962836,798.3315211
86,1859.497846,762.135947
87,1966.35974,776.6730353
88,2088.086246,808.0767316
89,1964.134743,851.5441764
90,2211.81001,866.3412008
91,1881.56405,805.7430148
92,1921.941058,725.2508829
93,1576.551861,606.5037422
95,2249.995426,882.4130493
96,2092.694714,778.8794369
97,2099.861152,840.9202391
98,1837.6733,760.0247786
99,1986.16533,796.1227279
100,1981.047087,747.7190033
Rialto,1879,892.02
Savannah,1654.75,834.2375
I assume you want to change the color of the points corresponding to the labels, since their precise coordinates along the x/y-axis aren't shown, and you'll have more work pinpointing them before the question about color comes up. If that's not the case, I'll delete my answer.
# define color corresponding to each genotype
HiBAP1517$color <- case_when(HiBAP1517$Genotype == "Savannah" ~ "firebrick1",
HiBAP1517$Genotype == "Rialto" ~ "darkorange",
TRUE ~ "black")
# plot
ggplot(HiBAP1517,
# specify color aesthetic here for both geom_point & geom_text to inherit
aes(x = BPM, y = Yld, color = color)) +
geom_point(shape = 16) +
geom_smooth(color = "#3366FF", # maintain default color for geom_smooth line
method = lm, se = F) +
# position labels based on their coordinates, rather than hard-code them via annotate()
geom_text(aes(label = ifelse(Genotype %in% c("Savannah", "Rialto"),
Genotype, "")),
hjust = 1, vjust = 1) +
# use defined colors directly
scale_color_identity() +
# other aesthetic parameters, irrelevant to the question at hand
labs(x = expression("AGDM"[PM]^{}*(gm^{-2})),
y = expression("GY"*(gm^{-2}))) +
theme_classic() +
theme(axis.title.x = element_text(color="black", size=14, face="bold"),
axis.title.y = element_text(color="black", size=14, face="bold"))
Data used:
> dput(HiBAP1517)
structure(list(Genotype = c("1", "2", "3", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "16", "17", "18", "19", "20",
"21", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
"33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43",
"44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54",
"55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65",
"66", "67", "68", "69", "70", "71", "72", "73", "74", "76", "77",
"78", "79", "81", "82", "83", "84", "85", "86", "87", "88", "89",
"90", "91", "92", "93", "95", "96", "97", "98", "99", "100",
"Rialto", "Savannah"), BPM = c(1767.793447, 2074.815941, 2197.933995,
2085.627286, 1908.97774, 2120.24666, 2226.617509, 2035.68002,
2153.727, 1993.568134, 2013.199982, 2078.275912, 2042.456487,
1840.058841, 2013.338146, 2087.151352, 1988.038384, 2083.092896,
2072.905795, 1849.744525, 1785.04038, 2078.402869, 1698.390774,
2065.842661, 2020.285009, 2039.137248, 1855.665106, 1792.32475,
1992.616447, 2025.79755, 2023.274784, 1703.837196, 1740.44177,
1970.331012, 1990.223669, 2081.559891, 1968.990856, 2178.322511,
1887.572381, 2103.964882, 2097.040605, 1864.779016, 1935.743565,
2191.797365, 1968.150754, 1858.735915, 1933.34954, 1680.540128,
1748.214736, 2183.694734, 2142.662802, 1892.205584, 2230.304238,
2141.882287, 1983.755009, 1954.653032, 1801.192428, 1920.709571,
1796.291216, 2026.074655, 1863.574774, 1924.971832, 2129.910527,
2090.201938, 1987.074651, 2053.104282, 1872.403668, 1961.144455,
2135.414422, 1857.780642, 2058.696424, 2169.489819, 1891.844601,
2099.708756, 1976.981377, 1932.909878, 2101.603045, 1867.872044,
1870.947954, 2135.962836, 1859.497846, 1966.35974, 2088.086246,
1964.134743, 2211.81001, 1881.56405, 1921.941058, 1576.551861,
2249.995426, 2092.694714, 2099.861152, 1837.6733, 1986.16533,
1981.047087, 1879, 1654.75), Yld = c(747.0708034, 775.8880562,
854.3810136, 845.9306447, 841.4318038, 875.5534429, 764.3849451,
810.2658242, 861.7024631, 782.5763292, 822.6565187, 837.2819632,
802.6913977, 767.6509829, 801.2064103, 822.1910199, 859.573342,
887.2783898, 861.3044422, 723.5014595, 747.4940519, 835.7669124,
681.256732, 852.3073467, 811.6889063, 821.7951099, 781.0350726,
744.9001931, 860.7054072, 834.1452611, 835.4102703, 682.9995098,
713.3121368, 816.5239645, 838.9949534, 822.5936391, 852.1259441,
920.80226, 721.0746569, 821.6521912, 873.0062511, 755.1746154,
895.4951282, 888.7284615, 863.7490909, 759.7144347, 774.4202087,
717.2402198, 783.3395385, 855.5897436, 912.635349, 776.5070164,
887.8378102, 903.7212821, 815.5541958, 743.0290819, 718.5391635,
808.6727692, 699.0526007, 909.3961954, 729.9547929, 770.2818388,
794.0297343, 809.6094569, 731.8146606, 839.4181954, 787.2339391,
824.335206, 881.9237509, 779.9428159, 840.2234927, 805.3868184,
756.8752683, 830.6765073, 786.4878009, 800.0033701, 834.2990498,
735.4201911, 703.6186056, 798.3315211, 762.135947, 776.6730353,
808.0767316, 851.5441764, 866.3412008, 805.7430148, 725.2508829,
606.5037422, 882.4130493, 778.8794369, 840.9202391, 760.0247786,
796.1227279, 747.7190033, 892.02, 834.2375)), class = "data.frame", row.names = c(NA,
-96L))

combine two data frames into one while keeping the original row numbers

I'm struggling with a very simple problem. I have two data frames to be combined into a single data frame while retaining their original row names. If you're interested to play with the two data frames:
> dput(cc)
structure(list(c = c(166.081273211195, 117.874627144804, 85.7050194973198,
122.960746859139, 144.149802403233, 90.8034500957001, 89.5265981283352,
77.8535718910714, 144.544005656701, 115.597165020403, 109.028391182666,
89.4045716355402, 77.7944830105746, 69.3378920684953, 79.9094499459695,
146.768077595585, 157.933946809176, 92.562729606313, 62.5081476457419,
90.1081848285295, 111.830482369239, 111.596975757741, 99.2311075024839,
145.204385556523, 99.0215341758211, 75.4302512245677, 92.0094563363458,
77.7314808830408)), .Names = "c", row.names = c("26", "27", "28",
"29", "35", "36", "37", "38", "39", "40", "46", "47", "48", "49",
"50", "51", "52", "56", "57", "58", "59", "60", "61", "62", "69",
"70", "71", "72"), class = "data.frame")
> dput(ccc)
structure(list(b = c(76.376257255471, 61.8314936138378, 62.769450181685,
73.6356164203567, 111.690756826382, 76.9294523843767, 61.3534699857719,
69.3647221333577, 83.9764878084258, 81.3800252294203, 69.5091780233591,
87.3595961209547, 78.5074999563006, 74.4479256924594, 81.5920316281566,
96.3417259554163, 75.4138056616399, 76.0553034201146, 95.1759950844736,
81.3252467041995, 86.306305649635, 70.5626459312969, 72.7797520793756,
119.49702877934, 123.268678343102, 88.0450051118928, 76.2139948860248,
98.1496728839206, 126.396927030103, 146.058540478643, 115.7341525964,
87.280600158726, 78.0274068331766, 122.817977752389, 142.491559175427,
152.895839114334, 94.4932174696818, 117.167042165763, 85.5340971715004,
101.480170738897, 117.759691799033, 128.998051359269, 98.3180491401911,
84.5915489017958, 87.4927520958843, 75.5366495973031, 118.088343275321,
121.375320935357, 94.7724147096235, 70.2266610201599, 123.158462686523,
76.87408931845, 94.4365460662552, 105.952134808703, 76.821070196668
)), .Names = "b", row.names = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "30", "31", "32",
"33", "34", "41", "42", "43", "44", "45", "53", "54", "55", "63",
"64", "65", "66", "67", "68", "73", "74", "75", "76", "77", "78",
"79", "80", "81", "82", "83"), class = "data.frame")
The easiest way to combine is
> c(cc$c, ccc$b)
[1] 166.08127 117.87463 85.70502 122.96075 144.14980 90.80345 89.52660 77.85357 144.54401
[10] 115.59717 109.02839 89.40457 77.79448 69.33789 79.90945 146.76808 157.93395 92.56273
[19] 62.50815 90.10818 111.83048 111.59698 99.23111 145.20439 99.02153 75.43025 92.00946
[28] 77.73148 76.37626 61.83149 62.76945 73.63562 111.69076 76.92945 61.35347 69.36472
[37] 83.97649 81.38003 69.50918 87.35960 78.50750 74.44793 81.59203 96.34173 75.41381
[46] 76.05530 95.17600 81.32525 86.30631 70.56265 72.77975 119.49703 123.26868 88.04501
[55] 76.21399 98.14967 126.39693 146.05854 115.73415 87.28060 78.02741 122.81798 142.49156
[64] 152.89584 94.49322 117.16704 85.53410 101.48017 117.75969 128.99805 98.31805 84.59155
[73] 87.49275 75.53665 118.08834 121.37532 94.77241 70.22666 123.15846 76.87409 94.43655
[82] 105.95213 76.82107
But doing this recreate the row numbers. Is there any simple function to combine while keeping the row names intact? Thanks!
These lines of code should solve the problem
z0 <- as.numeric(c(rownames(cc), rownames(ccc)))
z <- data.frame(c(cc$c, ccc$b))
row.names(z) <- z0
data.frame(z[order(as.numeric(row.names(z))),])

Doing curve fitting in R for a power series

I have trying to estimate the annual of an investment scheme which applied dollar cost averaging, which the increment of value is listed as below (sorry that MathJax looks like greek to me, so I didn't use it):
x: regular contribution, which is 1500 in my case
y: rate of return PLUS 100% (e.g 1.07 for a 7% return), which is the parameters that I want to estimate.
Time 1: xy
Time 2: (x + xy)*y
Time 3: (x + (x + xy)*y)*y
Time 4: (x + (x + (x + xy)*y)*y)*y
Time 5: (x + (x + (x + (x + xy)*y)*y)*y)*y
Time 6: (x + (x + (x + (x + (x + xy)*y)*y)*y)*y)*y
And the list goes on.
After simplifying the equation, if I calculate it correctly, it should be a power series:
xy(1 + y + y^2 + y^3 + y^4 + y^5 +y^6)
I know that equation above can be used for the nls function in R, as suggested in http://www.walkingrandomly.com/?p=5254. But the problem is, the investment scheme has been traded for more than 6 times, the number of trades is variable and I prefer not to fix the number of trades in the formula.
I wonder if R can create a formula with variable length of the power series, something like a function?
update 01
Thanks for the comment #Roland, I have dput my dataframe as below:
structure(list(date = 1:62, value = c(1500, 3008.1048, 4279.09223337264,
5701.16001583254, 7545.25391699441, 8883.87795645887, 11192.7249445628,
13043.5267669473, 14396.3707754063, 16677.2027610312, 18474.8268536672,
20225.6882177597, 21889.6372952495, 24090.0451286292, 25305.8719822623,
26293.5164474925, 27470.5608573055, 26851.4637959011, 25610.4708389126,
29781.3033136099, 30244.449772352, 31757.1977515, 35216.3065708333,
38661.857424377, 40153.0021899712, 41453.1839013205, 39626.0241467687,
42464.6515262833, 44415.7606695956, 46456.2932413184, 49539.1291983018,
51223.0944673951, 53534.0828137635, 56511.2727118443, 60750.8112270199,
62420.4165280642, 64561.1738159384, 67269.7609015725, 69582.2433935286,
68461.4426685366, 72790.7668201147, 73029.1128824367, 77963.2040906503,
81782.8304828104, 84781.7088147301, 87010.8577769314, 85461.5060309602,
90165.7453255817, 91340.1347579196, 92918.1083054977, 96713.3387975151,
99841.2477244806, 101538.099862003, 104946.468993318, 103233.508326534,
106416.67466519, 109991.955526668, 110800.989092493, 112258.758666778,
118567.887527905, 120872.966926589, 127711.586247323), expected_value = c(1511.96121064336,
3035.97901230344, 4572.1495459301, 6120.56971911465, 7681.33721220313,
9254.55048445835, 10840.3087802712, 12438.7121354211, 14049.861383387,
15673.8581617081, 17310.8049183956, 18960.8049183956, 20623.9622501033,
22300.3818319294, 23990.1694189188, 25693.4316094218, 27410.2758518191,
29140.8104512998, 30885.1445766939, 32643.3882673588, 34415.6524401213,
36202.0488962746, 38002.6903286308, 39817.6903286308, 41647.1633935093,
43491.224933518, 45349.9912792063, 47223.5796887596, 49112.1083553966,
51015.6964148254, 52934.4639527589, 54868.5320124903, 56818.0226025291,
58783.0587042976, 60763.7642798896, 62760.2642798896, 64772.6846512559,
66801.1523452654, 68845.7953255225, 70906.7425760312, 72984.1241093319,
75078.0709747036, 77188.7152664305, 79316.190132135, 81460.6297811776,
83622.169493123, 85800.9456262742, 87997.0956262742, 90210.7580347771,
92442.0724981876, 94691.1797764704, 96958.2217520299, 99243.3414386608,
101546.68299057, 103868.391711469, 106208.614063744, 108567.497677691,
110945.191360831, 113341.845107297, 115757.610107297, 118192.63875665,
120647.084666402)), .Names = c("date", "value", "expected_value"
), row.names = c("63", "62", "61", "60", "59", "58", "57", "56",
"55", "54", "53", "52", "51", "50", "49", "48", "47", "46", "45",
"44", "43", "42", "41", "40", "39", "38", "37", "36", "35", "34",
"33", "32", "31", "30", "29", "28", "27", "26", "25", "24", "23",
"22", "21", "20", "19", "18", "17", "16", "15", "14", "13", "12",
"11", "10", "9", "8", "7", "6", "5", "4", "510", "410"), class = c("tbl_df",
"tbl", "data.frame"))

Resources