I have time-series of 2d obsverations that I'm trying to smooth to take out some of the observation variability. I've been applying loess(), but just noticed it doesn't seem to smooth as a function of time but just across the entire pooled coordinates. Am I missing something? Is there a different function I should be using?
df<-structure(list(timestamp = structure(c(1586488380, 1586488440,
1586488560, 1586488620, 1586488680, 1586488740, 1586488800, 1586488860,
1586489520, 1586489580, 1586489700, 1586489820, 1586489880, 1586489940,
1586490000, 1586490060, 1586490120, 1586490180, 1586490240, 1586490300,
1586490360, 1586490420, 1586490480, 1586490540, 1586490600, 1586490660,
1586490720, 1586490780, 1586490840, 1586490900, 1586490960, 1586491020,
1586491200, 1586491260, 1586491320, 1586491380, 1586491440, 1586491500,
1586491560, 1586491620, 1586491680, 1586491740, 1586491800, 1586491860,
1586491920, 1586491980, 1586492040, 1586492100, 1586492160, 1586492220,
1586492280, 1586492340, 1586492400, 1586492460, 1586492520, 1586492580,
1586492640, 1586492700, 1586492760, 1586492820, 1586492880, 1586492940,
1586493000, 1586493060, 1586493120, 1586493180, 1586493240, 1586493300,
1586493360, 1586493420, 1586493480, 1586493540, 1586493600, 1586493660,
1586493720, 1586493780, 1586493840, 1586493900, 1586493960, 1586494020,
1586494200, 1586494260, 1586494320, 1586494380, 1586494440, 1586494500,
1586494560, 1586494620, 1586494680, 1586494740, 1586494800, 1586494860,
1586494920, 1586494980, 1586495040, 1586495100, 1586495160, 1586495220,
1586495280, 1586495340, 1586495400, 1586495460, 1586495520, 1586495580,
1586495640, 1586495700, 1586495760, 1586495820, 1586495880, 1586495940,
1586496000, 1586496060, 1586496120, 1586496180, 1586496240, 1586496300,
1586496360, 1586496420, 1586496480, 1586496540, 1586496600, 1586496660,
1586496720, 1586496780, 1586496840, 1586496900, 1586496960, 1586497020,
1586497080, 1586497140, 1586497200, 1586497260, 1586497320, 1586497380,
1586497440, 1586497500, 1586497560, 1586497620, 1586497680, 1586497740,
1586497800, 1586497860, 1586497920, 1586497980, 1586498040, 1586498100,
1586498160, 1586498220, 1586498280, 1586498340), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), easting = c(740.582355718548, 740.582355718548,
739.726374785548, 739.611045841548, 739.508690311548, 739.398269506548,
739.278804356548, 739.627760514548, 737.913640733548, 738.088450601548,
738.551491861548, 738.957133488548, 739.137345557548, 739.304664573548,
739.460440784548, 739.605842807548, 739.741887116548, 739.719077482548,
739.369420509548, 738.973489249548, 738.521335985548, 739.279305656548,
739.993757669548, 740.085239162548, 740.172262825548, 740.255157063548,
740.334219013548, 740.409718260548, 740.481900024548, 740.550987811548,
740.573883125548, 740.252267406548, 739.261723439548, 738.935233921548,
738.774921432548, 738.615895069548, 738.451107559548, 738.280235586548,
738.493740162548, 738.717501067548, 738.925752666548, 739.120074872548,
739.301840012548, 739.472245999548, 739.632343117548, 739.890965132548,
740.098495936548, 740.293354354548, 740.476683179548, 740.649491986548,
740.678160672548, 740.443560695548, 740.398855065548, 740.451032168548,
740.467918582548, 740.160041067548, 739.819912921548, 739.641686751548,
739.457020461548, 739.265544595548, 739.193281488548, 739.227252654548,
738.995761471548, 738.822890745548, 738.775446949548, 738.726816309548,
738.676941805548, 738.625762928548, 739.254178659548, 739.727445331548,
740.019566884548, 740.129316037548, 740.255273807548, 740.442527947548,
740.615257127548, 740.775140223548, 740.923607252548, 741.055065964548,
741.139279130548, 740.907234314548, 739.290829342548, 739.115359714548,
740.664354207548, 740.589899151548, 740.578913554548, 740.571708783548,
740.568311076548, 740.568740893548, 740.527920123548, 740.358565457548,
740.392277156548, 740.370981239548, 740.289917518548, 740.214089159548,
740.192635592548, 740.176850935548, 740.160315351548, 740.143068630548,
740.103174446548, 740.008327647548, 740.058271768548, 740.205384482548,
740.211048172548, 740.150159818548, 740.122028309548, 740.230164637548,
740.271076846548, 740.075087486548, 739.768752873548, 739.586722485548,
739.940259334548, 740.233576255548, 740.473614136548, 740.495703912548,
740.341935547548, 740.186820856548, 740.204435025548, 740.299218490548,
740.318343269548, 740.238895133548, 739.999671854548, 740.062183564548,
740.196345466548, 740.329697802548, 740.418193609548, 740.311257937548,
740.270203214548, 740.209679752548, 740.146590442548, 740.079785501548,
740.170176300548, 740.268945921548, 740.217498771548, 740.133923060548,
740.117921377548, 740.177771453548, 740.140658663548, 740.080204534548,
740.108449333548, 740.145621912548, 740.182429420548, 740.010376475548,
739.819150336548, 739.616854492548, 739.369690457548, 739.104183601548,
738.938020260548, 738.843359187548, 738.802644324548, 738.761524527548
), northing = c(2307.15134120986, 2307.15134120986, 2307.60836846986,
2307.72110371186, 2307.83015111886, 2307.94605545486, 2308.06963872386,
2307.94323643186, 2308.11539257586, 2307.98516105286, 2307.67209087786,
2307.39795736686, 2307.27544716286, 2307.16124100486, 2307.05447137086,
2306.95438746086, 2306.86033624586, 2306.85049579286, 2307.02449397686,
2307.22230535086, 2307.44905018086, 2306.99878407786, 2306.57790074586,
2306.51052483586, 2306.44607531386, 2306.38433869586, 2306.32512182186,
2306.26824947086, 2306.21356227886, 2306.16091500786, 2306.12077142386,
2306.17964098286, 2306.35042514386, 2306.42512419786, 2306.46180283886,
2306.49818722086, 2306.53588972286, 2306.57498431686, 2306.47587680786,
2306.37665531786, 2306.28431106486, 2306.19814347186, 2306.11754401386,
2306.04198150286, 2305.97099021786, 2305.88981694586, 2305.83737042086,
2305.78847203986, 2305.74280289286, 2305.70008126986, 2305.72436278986,
2305.79211047386, 2305.75001607586, 2305.66373079386, 2305.59454921786,
2305.66723079486, 2305.74826733386, 2305.75631243686, 2305.76412484986,
2305.77169383886, 2305.76318962486, 2305.74988901286, 2305.84052390686,
2305.91140100786, 2305.95786180686, 2306.00518488486, 2306.05340722986,
2306.10256811686, 2305.87083530186, 2305.67745118186, 2305.55808577486,
2305.51324056486, 2305.46177226186, 2305.38525730986, 2305.31467748186,
2305.24934676386, 2305.18868082786, 2305.13496475486, 2305.08628958686,
2305.07937405386, 2305.33244795286, 2305.26640414086, 2304.97847050686,
2304.88865807586, 2304.78121096686, 2304.67333694586, 2304.56517670086,
2304.45687199986, 2304.26299422086, 2303.81398617786, 2303.81772073686,
2303.82576399386, 2303.82937144386, 2303.85166918186, 2303.85679798586,
2303.85841996086, 2303.85934299186, 2303.85962745886, 2303.81418344986,
2303.71268455886, 2303.70396413486, 2303.71187848686, 2303.71241867586,
2303.72409686386, 2303.75626565486, 2303.81432053886, 2303.80604508386,
2303.57280991386, 2303.21896587386, 2303.06912105986, 2303.28351126486,
2303.45378473786, 2303.49375232686, 2303.50460433986, 2303.48302188886,
2303.48174695086, 2303.52692291386, 2303.61686633486, 2303.41139580686,
2303.32679365886, 2303.06984393086, 2303.15017783486, 2303.29293566086,
2303.44528532286, 2303.48203523086, 2302.89274879786, 2302.81440275486,
2302.73512963586, 2302.65410710886, 2302.56757728186, 2302.77320543086,
2303.00846800486, 2303.01932301786, 2302.97477011386, 2303.03742546386,
2303.13970331386, 2303.07628123386, 2302.97297111586, 2303.02123867886,
2303.08476293486, 2303.14766331986, 2303.17026860886, 2303.18249014186,
2303.19025912386, 2303.15218828686, 2303.11064943486, 2302.98849464786,
2302.87632040886, 2302.82807292386, 2302.77934558786)), row.names = 5905:6054, class = "data.frame")
df.fitted<-loess(northing ~ easting, span = .5, data = df)
df$northing.fitted<-df.fitted$fitted
ggplot(df, aes(x=easting,y=northing)) +
geom_path(color='orangered2') +
geom_point(aes(y=northing.fitted))
So, instead of smoothing the "cluster", I'd like to use a rolling average smoothing each x/y pair as a function of time.
You need to regress both easting and northing as functions of time to get smoother x, y values:
df$numtime <- as.numeric(df$timestamp)
df.fitted.northing <-loess(northing ~ numtime, span = .5, data = df)
df.fitted.easting <- loess(easting ~ numtime, span = .5, data = df)
newdat <- data.frame(numtime = seq(min(df$numtime), max(df$numtime), len = 1000))
newdat$northing <- predict(df.fitted.northing, newdat)
newdat$easting <- predict(df.fitted.easting, newdat)
ggplot(df, aes(easting, northing)) +
geom_path(aes(color = "original path"), alpha = 0.6, size = 0.5,
arrow = arrow(length = unit(0.1, "inches"))) +
geom_point(aes(color = "original path"), alpha = 0.6, size = 1) +
geom_path(data = newdat, size = 1, aes(color = "smoothed"),
arrow = arrow(length = unit(0.1, "inches"))) +
coord_equal() +
theme_light() +
scale_color_manual(values = c("original path" = "orangered2",
"smoothed" = "deepskyblue4"), name = "")
I keep getting an error becasue the bargraphs are used for quaterly data and the line is going to be data from the entire year so it will have many points.
The only issue is with the geom_line function which I am new to using. The error is -->
Error in scale_fill_manual(values = c("green", "yellow")) + geom_line(aes(x = dts2, : non-numeric argument to binary operator
t="DG"
fin=getFinancials(t, auto.assign = F)
dts = labels(fin$BS$A)[[2]]
dts2 = paste(substr(dts,1,7),"::",dts, sep="")
stockprices = getSymbols(t, auto.assign = F)
price = rep(0,NROW(dts))
for(i in 1:NROW(price))
{
price[i]=as.vector(last(stockprices[dts2[i],6]))
}
yr= as.numeric(substr(dts,1,4))
pastyr = yr -2
betayr = paste(pastyr,"::",yr,sep="")
os = fin$BS$A["Total Common Shares Outstanding", ]
gw= fin$BS$A["Goodwill, Net", ]
ta= fin$BS$A["Total Assets", ]
td= fin$BS$A["Total Debt", ]
ni= fin$IS$A["Net Income", ]
btax = fin$IS$A["Income Before Tax", ]
atax = fin$IS$A["Income After Tax",]
intpaid = fin$CF$A["Cash Interest Paid, Supplemental",]
gw[is.na(gw)]=0
intpaid[is.na(intpaid)]=0
taa = (ta - gw)/os
Rd = rep(0,NROW(dts))
for(i in 1:NROW(dts))
{
if(td[i]!=0)
{
Rd[i] = intpaid[i]/td[i]
}
}
gspc = getSymbols("^GSPC", auto.assign = F)
gs5 = getSymbols("GS5", src = "FRED", auto.assign = F)
marketRisk = rep(0,NROW(dts))
riskFree = rep(0,NROW(dts))
beta = rep(0,NROW(dts))
for(i in 1:NROW(dts))
{
marketRisk[i]= mean(yearlyReturn(gspc[betayr[i]]))
riskFree[i] = mean(gs5[betayr[i]])
gspc.weekly = weeklyReturn(gspc[betayr[i]])
stockprices.weekly = weeklyReturn(stockprices[betayr[i]])
beta[i] = CAPM.beta(stockprices.weekly,gspc.weekly)
}
Re = (riskFree/100) + beta * (marketRisk-(riskFree/100))
E = os*price
V=E+td
Tc = (btax - atax)/btax
wacc = (E/V)*Re + (td/V)*Rd*(1-Tc)
margin = (ni/wacc)/os - taa
taadf = data.frame(dts,val = taa,cat="ta")
margindf = data.frame(dts,val = margin ,cat="margin")
mdf=rbind(margindf,taadf)
#linrng = paste(dts[NROW(dts)],"::",dts[1],sep="")
#dfdt = data.frame(stockprices[linrng,6])
#dfdt2 = data.frame(dt = labels(dfdt)[[1]],dfdt$AAPL.Adjusted,cat="taa")
#names(dfdt2)=c("dt,price,cat")
pricedf = data.frame(as.vector((stockprices[dts2[i],6])))
ggplot(mdf, aes(x=dts,y=val,fill=cat)) + geom_bar(stat="identity",color="black")
scale_fill_manual(values = c("green","yellow")) +
geom_line(aes(x=dts2, y=stockprices), stat = "identity",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE)
Note, the object stockprices is An ‘xts’ object. So, you can't use inside ggplot scale. I picked the fist variable of stockprices object to show the code, but you probabli want another one.
library(dplyr)
library(quantmod)
library(PerformanceAnalytics)
library(ggplot2)
stockprices_df <- as.data.frame(stockprices) %>%
mutate(date = rownames(.)) %>%
filter(date %in% dts)
ggplot() +
geom_col(
data = mdf,
aes(x = dts,y = val,fill = cat)
) +
geom_line(
data = stockprices_df,
aes(x = date, y = DG.Open, group = 1 )
) +
scale_fill_manual(values = c("green","yellow"))
[
So I use the following functions for plotting most of the data I have to plot. I created it thanks to different chunks of code that I have found online. So far I have never encountered any issue with it.
Here is the plotting function first.
library(ggplot2)
library(reshape2)
#' Plot a given mean with error bars
#' #param resultTable The table with all the result to plot
#' #param techniques The name of the techniques in the form of a list/vector
#' #param nbTechs The number of given techniques
#' #param ymin The minimum value for y
#' #param ymax The maximum value for y
#' #param xAxisLabel The label for the x (vertical) axis
#' #param yAxisLable The label for the y (horizontal) axis
#' #return
#'
barChartTime <- function(resultTable, techniques, nbTechs = -1, ymin, ymax, xAxisLabel = "I am the X axis", yAxisLabel = "I am the Y Label"){
#tr <- t(resultTable)
if(nbTechs <= 0){
stop('Please give a positive number of Techniques, nbTechs');
}
tr <- as.data.frame(resultTable)
nbTechs <- nbTechs - 1 ; # seq will generate nb+1
#now need to calculate one number for the width of the interval
tr$CI2 <- tr$upperBound_CI - tr$mean_time
tr$CI1 <- tr$mean_time - tr$lowerBound_CI
#add a technique column
tr$technique <- factor(seq.int(0, nbTechs, 1));
breaks <- c(as.character(tr$technique));
print(tr)
g <- ggplot(tr, aes(x=technique, y=mean_time)) +
geom_bar(stat="identity",fill = I("#CCCCCC")) +
geom_errorbar(aes(ymin=mean_time-CI1, ymax=mean_time+CI2),
width=0, # Width of the error bars
size = 1.1
) +
#labs(title="Overall time per technique") +
labs(x = xAxisLabel, y = yAxisLabel) +
scale_y_continuous(limits = c(ymin,ymax)) +
scale_x_discrete(name="",breaks,techniques)+
coord_flip() +
theme(panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
geom_point(size=4, colour="black") # dots
print(g)
}
Now, here is (a simplified version of the data) data that I am using (and that reproduces the error):
EucliP,AngularP,EucliR,AngularR,EucliSp,AngularSp,EucliSl,AngularSl
31.6536,30.9863,64.394,92.7838,223.478,117.555,44.7374,25.4852
12.3592,40.7639,70.2508,176.55,10.3927,145.909,143.025,126.667
14.572,8.98445,113.599,150.551,47.1545,54.3019,10.7038,47.7004
41.7957,20.9542,55.1732,67.1647,52.364,41.3655,62.7036,75.65
135.868,83.7135,14.0262,69.7183,44.987,35.9599,19.5183,66.0365
33.5359,17.2129,6.95909,47.518,224.561,91.4999,67.1279,31.4079
25.7285,33.6705,17.4725,58.45,43.1709,113.847,28.9496,20.0574
48.4742,127.588,75.0804,89.1176,31.4494,27.9548,38.4563,126.248
31.9831,80.0161,19.9592,145.891,55.2789,142.738,94.5126,136.099
17.4044,52.3866,49.9976,150.891,104.936,77.2849,232.23,35.6963
153.359,151.897,41.8876,46.3893,79.5218,75.2011,68.9786,91.8972
And here is the code that I am using:
data = read.table("*Path_to_file*.csv", header=T, sep=",")
data$EucliPLog = (data$EucliP) #Before here I used to use a log transform that I tried to remove for some testing
data$EucliRLog = (data$EucliR) #Same thing
data$EucliSpLog = (data$EucliSp) #Same thing
data$EucliSlLog = (data$EucliSl) #Same thing
a1 = t.test(data$EucliPLog)$conf.int[1]
a2 = t.test(data$EucliPLog)$conf.int[2]
b1 = t.test(data$EucliRLog)$conf.int[1]
b2 = t.test(data$EucliRLog)$conf.int[2]
c1 = t.test(data$EucliSpLog)$conf.int[1]
c2 = t.test(data$EucliSpLog)$conf.int[2]
d1 = t.test(data$EucliSlLog)$conf.int[1]
d2 = t.test(data$EucliSlLog)$conf.int[2]
analysisData = c()
analysisData$ratio = c("Sl","Sp","R","P")
analysisData$pointEstimate = c(exp(mean(data$EucliSlLog)),exp(mean(data$EucliSpLog)),exp(mean(data$EucliRLog)),exp(mean(data$EucliPLog)))
analysisData$ci.max = c(exp(d2), exp(c2),exp(b2), exp(a2))
analysisData$ci.min = c(exp(d1), exp(c1),exp(b1), exp(a1))
datatoprint <- data.frame(factor(analysisData$ratio),analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min)
colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI ")
barChartTime(datatoprint,analysisData$ratio ,nbTechs = 4, ymin = 0, ymax = 90, "", "Title")
So If I do use the log() that I mention in the comments of the last piece of code, everything works fine and I get my plots displayed. However, I tried removing the log and I get the famous
Error in matrix(value, n, p) :
'data' must be of a vector type, was 'NULL'
I have tried looking for null values in my data but there are none and I do not know where to look at next. Would love to get some help with that.
Thanks in advance
Edit: Here is the result of dput on datatoprint:
structure(list(technique = structure(c(3L, 4L, 2L, 1L), .Label = c("P",
"R", "Sl", "Sp"), class = "factor"), mean_time = c(1.04016257618464e+32,
1.64430609815788e+36, 7.5457775364611e+20, 3.85267453902928e+21
), lowerBound_CI = c(6.64977706609883e+50, 5.00358136618364e+57,
2.03872433045407e+30, 4.93863589006376e+35), `upperBound_CI ` = c(16270292584857.9,
540361462434140, 279286207454.44, 30055062.6409769)), .Names = c("technique",
"mean_time", "lowerBound_CI", "upperBound_CI "), row.names = c(NA,
-4L), class = "data.frame")
And the dput on analysisData:
structure(list(ratio = c("Sl", "Sp", "R", "P"), pointEstimate = c(1.04016257618464e+32,
1.64430609815788e+36, 7.5457775364611e+20, 3.85267453902928e+21
), ci.max = c(6.64977706609883e+50, 5.00358136618364e+57, 2.03872433045407e+30,
4.93863589006376e+35), ci.min = c(16270292584857.9, 540361462434140,
279286207454.44, 30055062.6409769)), .Names = c("ratio", "pointEstimate",
"ci.max", "ci.min"))
Without the log I don't have anything on display because the value are above 10^40++ whereas with the log it's below the upper limit (90).
I don' get the error you get though.