convert dataframe to time series for arima - r

I am having problems converting the following dataset to ts to be used with stats::arima
I was able to convert to xts objet but arima does not seem to like it.Can someone guide me on
how to convert it to ts? I really need to use arima model here. Thanks
library(ggfortify)
library(xts)
wt <- structure(list(SampleDate = structure(c(13687, 13694, 13701,
13708, 13715, 13722, 13729, 13736, 13743, 13750, 13757, 13764,
13771, 13778, 13785), class = "Date"), DOC = c(3, 10, 17, 24,
31, 38, 45, 52, 59, 66, 73, 80, 87, 94, 101), AvgWeight = c(1,
1.66666666666667, 2.06666666666667, 2.275, 3.83333333333333,
6.2, 7.4, 8.5, 10.25, 11.1, 13.625, 15.2, 16.375, 17.8, 21.5),
PondName = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Pond01", class = "factor")), row.names = c(NA,
15L), class = "data.frame")
pond <- as.xts(wt$AvgWeight,order.by=seq(as.Date("2007-06-23"), by=7, len=15))
d.arima <- arima(pond)
#arima is not recognized.....probably because I need a ts and not a xts object here.....
autoplot(d.arima, predict = predict(d.arima, n.ahead = 3,prediction.interval = TRUE,level=0.95),
ts.colour='dodgerblue',predict.colour='green',
predict.linetype='dashed',ts.size=1.5,conf.int.fill='azure3') + xlab('DOC') + ylab('AvgWeight-grs') +
theme_bw()
I get this weird plot...

Related

Plotly: How to convert ggplot object into plotly?

I have one data set with two columns.First column is with name Centile.threshold and second is Effective.tax.rates. So next steep is to make plot with this code below.
# Data and code
library("rJava")
library("xlsxjars")
library("xlsx")
require(tidyr)
require(plyr)
library("ggplot2")
library("plotly")
g4_data_ext<-data.frame(structure(list(Centile.threshold = c(1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100), Effective.tax.rates = c(11.4875111183361,
8.07673874931679, 7.8639563682086, 6.84656368538612, 6.8383437884744,
5.83532681932012, 5.11955857753708, 4.68757220539562, 4.66492423437793,
4.63051650494602, 4.71570390296145, 4.4419481131279, 4.16374366969064,
4.4424750798236, 4.2633646392858, 4.10185821346884, 2.29819561722,
2.01895390855722, 0, 0, 2.80530318111453, 2.83367683951859, 3.03173307975026,
3.58758933227946, 3.05869003045607, 3.59225918463074, 3.01588015121759,
3.55943967998446, 3.6220565232692, 3.40885422585891, 3.51447401518606,
3.68902868712004, 3.77018314638409, 3.72783452684771, 3.80791342516448,
3.99449874248864, 4.08421307782513, 4.07517557211112, 4.19659018929637,
4.22887420998102, 4.25529382081159, 4.36205679154288, 4.40690982734329,
4.33909305037396, 4.45990415426276, 4.59436808108174, 4.6831546716255,
4.73811656768519, 4.75412915916737, 4.84778797281815, 4.94690785473091,
5.06784298188807, 5.1769208879221, 5.2520552039406, 5.33650672817794,
5.43499638671921, 5.52400199193912, 5.58169115527766, 5.70509046165446,
5.76549758450655, 5.86333222670147, 5.87097687497217, 5.98729128544292,
6.07397530734785, 6.15030076581313, 6.21615540600908, 6.28135059352123,
6.38292345287997, 6.43416757218245, 6.5863284138631, 6.63365437304645,
6.70316768627345, 6.7816891944299, 6.85128738244695, 6.94261253911407,
7.01673024329712, 7.11081973369591, 7.18077796481166, 7.26197149513331,
7.32607460317916, 7.39638728837014, 7.47062968448649, 7.55194205005014,
7.64318101794584, 7.73728594723894, 7.79092205170689, 7.88152530983832,
7.97428540786095, 8.09278589483141, 8.20373396784042, 8.27757060469128,
8.40889176349213, 8.50851684368756, 8.64124701008068, 8.72559960562268,
8.85276486059087, 9.06564270204267, 9.26861906650096, 9.43047799204161,
10.2298639144453), grp_id = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
6, 6, 6, 7, 8), grp_label = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 2L, 3L), .Label = c("<=50%",
"=99%", ">99%", "51%-60%", "61%-70%", "71%-80%", "81%-90%", "91%-98%"
), class = "factor")), row.names = c(NA, -100L), class = "data.frame"))
This is code for plotting a plot with ggplot. This code works well.
g4_data_ext<-data.frame(g4_data, grp_id=NA, grp_label=NA)
for (i in 1:length((g4_data$Centile.threshold)))
{
if (g4_data$Centile.threshold[i]<=50)
{
g4_data_ext$grp_label[i] <- "<=50%"
g4_data_ext$grp_id[i] <- 1
}
else if (51<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=60)
{
g4_data_ext$grp_label[i] <- "51%-60%"
g4_data_ext$grp_id[i] <- 2
}
else if (61<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=70)
{
g4_data_ext$grp_label[i] <- "61%-70%"
g4_data_ext$grp_id[i] <- 3
}
else if (71<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=80)
{
g4_data_ext$grp_label[i] <- "71%-80%"
g4_data_ext$grp_id[i] <- 4
}
else if (81<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=90)
{
g4_data_ext$grp_label[i] <- "81%-90%"
g4_data_ext$grp_id[i] <- 5
}
else if (90<=g4_data$Centile.threshold[i] & g4_data$Centile.threshold[i]<=98)
{
g4_data_ext$grp_label[i] <- "91%-98%"
g4_data_ext$grp_id[i] <- 6
}
else if (g4_data$Centile.threshold[i]==99)
{
g4_data_ext$grp_label[i] <- "=99%"
g4_data_ext$grp_id[i] <- 7
}
else
{
g4_data_ext$grp_label[i] <- ">99%"
g4_data_ext$grp_id[i] <- 8
}
}
g4_data_ext$grp_label<-factor(g4_data_ext$grp_label)
color_code<-factor(c(rep("dark turquoise", 6), "tomato", "orange red"))
means_g4<-data.frame(ddply(g4_data_ext,~grp_id+grp_label,summarise,mean=mean(Effective.tax.rates),sd=sd(Effective.tax.rates)), color_code)
ggplot(data = means_g4, aes(x=factor(grp_id),y=mean, label =mean ))+
scale_x_discrete("Group",breaks=c(1:8), labels=means_g4$grp_label)+
coord_cartesian(ylim = c(-3, 12)) +
geom_bar(stat="identity", fill=color_code)+
geom_text(aes(y = mean*1.1, label = round(mean,2)), position=position_dodge(0.9))+
theme(axis.text.x = element_text(angle = 0))
But my intention is to convert this plot with ggplotly().I tryed to convert but is work well, so can anybody help me how to convert this plot into plotly object ?
When I try
ggplotly(ggplot(data = means_g4, aes(x=factor(grp_id),y=mean, label =mean ))+
scale_x_discrete("Group",breaks=c(1:8), labels=means_g4$grp_label)+
coord_cartesian(ylim = c(-3, 12)) +
geom_bar(stat="identity", fill=color_code)+
geom_text(aes(y = mean*1.1, label = round(mean,2)), position=position_dodge(0.9))+
theme(axis.text.x = element_text(angle = 0)))
I get the error
Error in nchar(axisObj$ticktext) : 'nchar()' requires a character vector
This error goes away if I leave out the scale_x_discrete term. The only thing there that looks like it should be a character vector is labels=means_g4$grp_label, and indeed means_g4$grp_label is a factor. So convert it, and the ggplotly() works:
ggplotly(ggplot(data = means_g4, aes(x=factor(grp_id),y=mean, label =mean ))+
scale_x_discrete("Group",breaks=c(1:8), labels=as.character(means_g4$grp_label))+
coord_cartesian(ylim = c(-3, 12)) +
geom_bar(stat="identity", fill=color_code)+
geom_text(aes(y = mean*1.1, label = round(mean,2)), position=position_dodge(0.9))+
theme(axis.text.x = element_text(angle = 0)))
This looks like a bug in ggplotly(), which should be doing that conversion automatically.
user2554330 figured out the hard part of your question. The rest is easy, and since I don't see your actual attempt at converting ggplot to plotly, here's how you can do that:
From a "standard" ggplot approach, just include ggplotly:
p <- ggplot(...)
p <- p + geom_point(...) # just an example...
fig <- ggplotly(p)
fig
Here's an example:
library(plotly)
library(tidyverse)
library(ggplot2)
# ggplot
p <- ggplot(data=iris, aes(x = Sepal.Length, y = Sepal.Width))
p <- p + geom_point(aes(color=Species, shape=Species)) +
xlab("Sepal Length") + ylab("Sepal Width") +
ggtitle("Sepal Length-Width")
# plotly
fig <- ggplotly(p)
fig
Plot:

ggplot2: How to change the width of boxplots according to another variable?

I created a boxplot showing the dispersal distance $dist of some species $spe, and I would like the width of the boxes to be proportional to the density of regeneration of these species. I used "varwidth" and weight aesthetic as shown below, but this is still not correct, as it is still proportional to the number of observations and not only to the density of regeneration...
(for the density, I calculated the proportion for each species, so it goes from 10 to 100. It is given in the column data_dist2$prop2)
p <- ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
coord_flip() +
geom_boxplot(varwidth = TRUE, alpha=0.3, aes(weight=data_dist2$prop2), fill='grey10')
Would you have any idea how to make the boxplot exactly proportional to my prop2 column?
Reproductive example :
structure(list(spe = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Abies concolor", "Picea abies", "Sequoia semp."
), class = "factor"), dist = c(0, 0, 3, 3, 4, 4, 25, 46, 59,
113, 113, 9, 12, 12, 12, 15, 22, 22, 22, 22, 35, 35, 36, 49,
85, 85, 90, 5, 5, 1, 1, 8, 13, 48, 48, 52, 52, 52, 65, 89), prop2 = c(92.17,
92.17, 92.17, 92.17, 92.17, 92.17, 92.17, 92.17, 92.17, 92.17,
92.17, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9,
10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100)), row.names = c(NA,
-40L), class = "data.frame")
Weight doesn't seem to be designed exactly for this, but you can hack it a bit. First note that the weight given to each group is the sum of the weights of the observations, so if you have a different number of observation for each species then you may need to change prop2 to the current value divided by the number of observations in the group. (I can't tell from your example if this applies)
Then note that the width is proportional to the square root of the weight, so change your code to reverse that with:
p <- ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
coord_flip() +
geom_boxplot(varwidth = TRUE, alpha=0.3, aes(weight=data_dist2$prop2^2), fill='grey10')
Miff beats me to it, but anyway here's my answer. As Miff said, you can weight the width by your prop2.
ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
geom_boxplot(aes(weight = prop2),
varwidth = TRUE,
fill='grey10', alpha=0.3) +
coord_flip()
But geom_boxplot() implicitly takes the sample size into account. So you need to divide that away in your weights. Here's how you can do it with data.table.
library(data.table)
setDT(data_dist2) # convert to data.table
data_dist2[, weight := prop2 / .N, by = spe] # Divide prop2 by sample size for each species
ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
geom_boxplot(aes(weight = weight), # note weight = weight, not weight = prop2
varwidth = TRUE,
fill='grey10', alpha=0.3) +
coord_flip()

left_join duplicates even after troubleshooting

Sample data:
full<-structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("AKS",
"AOK", "BTX", "GTX", "HKS", "JKS", "LOK", "MKS", "MOK", "PKS",
"SKS", "VTX"), class = "factor"), CT_NT = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = c("CT", "NT"), class = "factor"), Depth = c(5L,
10L, 15L, 5L, 10L, 15L), Site = c(1L, 1L, 1L, 1L, 1L, 1L), PW = c(22.8,
21.5, 18.2, 22.5, 20.5, 19.2), BD = c(1.1, 1.2, 1.1, 1.3, 1.3,
1.5)), .Names = c("Location", "CT_NT", "Depth", "Site", "PW",
"BD"), row.names = c(NA, 6L), class = "data.frame")
osu<-structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("AKS",
"AOK", "BTX", "GTX", "HKS", "JKS", "LOK", "MKS", "MOK", "PKS",
"SKS", "VTX"), class = "factor"), CT_NT = structure(c(1L, 1L,
1L, 2L, 2L, 2L), .Label = c("CT", "NT"), class = "factor"), Depth = c(5L,
10L, 15L, 5L, 10L, 15L), pH = c(5.1, 5.4, 5.9, 5.2, 5.9, 6.2),
N = c(50, 31, 22, 35, 17, 8), P = c(122, 55, 34, 107, 23,
17), K = c(1301, 1202, 1078, 1196, 1028, 948), OM = c(2.3,
1.8, 1.5, 2.1, 1.4, 1.2), NH4 = c(19.3, 14.5, 11.6, 12.3,
8.6, 8.4), Sand = c(22.5, 25, 25, 25, 22.5, 18.8), Silt = c(56.3,
52.5, 50, 51.3, 52.5, 51.3), Clay = c(21.3, 22.5, 25, 23.8,
25, 30)), .Names = c("Location", "CT_NT", "Depth", "pH",
"N", "P", "K", "OM", "NH4", "Sand", "Silt", "Clay"), row.names = c(NA,
6L), class = "data.frame")
I am trying to join two datasets using left_join in dplyr. To my astonishment, I'm getting duplicate rows that are somehow not being identified as such. After reading all the other answers I could get my hands on here that seemed to address "join" issues (at least I'm not the only one who has them...?), I have tried:
Checking the group types of the joining variables in the two datasets
to ensure they match
Checking that I don't have duplicates within f1 or f2
Checking that the categorical columns I'm using to join are, in fact,
the same length and have the same contents. They're EXACTLY the same,
all the way down to the order I put them in
Explicitly specifying to dplyr to use Location, CT_NT, and Depth to
join
Letting dplyr figure out the joining variables itself Joining in both
orders Using inner_join--I ended up with f1 only
I've used left_join before and not had this issue, and it was with a very similar dataset (the pilot data to this full study, in fact). I thought I understood what left_join was doing, but now I'm wondering if I don't actually. I'm trying to get better with using dplyr, but unfortunately it's a lot of me bashing away at things until something works and I can figure out why it worked so I can reproduce it again later as needed.
Given my inexperience, I'm sure the answer is going to be frustratingly straightforward and simple, to the annoyance of everyone involved. Such is the life of learning to code, I guess. Thank you in advance for dealing with a rookie's doofy questions!
Here's my code:
f1<-full %>% #Build pilot_summary. Pipe pilot to...
group_by(Location,CT_NT,Depth,Site) %>% #group_by to work on CT or NT at each site
summarise_at(5:6,funs(mean)) %>% #calculate site means
ungroup(f1)
f1$Depth<-as.factor(f1$Depth)
f1$Site<-NULL
osu$Texture_Class<-NULL#Take out the texture class column
f2<- osu %>%
group_by(Location,CT_NT,Depth) %>% #group because otherwise R tries to crash on the next line of code...
arrange(Location,CT_NT,Depth) %>% #Put everything in order like f1, just in case
ungroup(f2)
f2$Depth<-as.factor(f2$Depth)
full_summary<-left_join(f1,f2)

Error in cor(data[, -1], use = "complete.obs") : 'x' must be numeric

I'm completely new to R - really have no clue what I'm doing to be honest. But I really need to run bivariate/multivariate regressions with this data following someone's advice and I'm stuck. Any help is greatly appreciated.
rm(list=ls())
setwd("C:/Users/Bogi/Documents/School/Honors Thesis/Voting and Economic Data")
data<-read.csv("BOGDAN_DATA1.csv")
head(data)
round(cor(data[,-1],use="complete.obs"),1)
Error in cor(data[, -1], use = "complete.obs") : 'x' must be numeric
dput
structure(list(REGION = structure(1:6, .Label = c("Altai Republic",
"Altai Territory", "Amur Region", "Arkhangelsk Region", "Astrakhan region",
"Belgorod region"), class = "factor"), PCT_CHANGE_VOTE = structure(c(2L,
3L, 5L, 4L, 6L, 1L), .Label = c("-13%", "-16%", "-17%", "-25%",
"-26%", "2%"), class = "factor"), PCT_CHANGE_GRP = structure(c(2L,
1L, 4L, 3L, 3L, 4L), .Label = c("10%", "17%", "19%", "27%"), class = "factor"),
PCT_CHANGE_INFLATION = structure(c(1L, 2L, 1L, 3L, 3L, 2L
), .Label = c("-2%", "-3%", "-4%"), class = "factor"), PCT_CHANGE_UNEMP = structure(c(5L,
4L, 1L, 2L, 6L, 3L), .Label = c("-13%", "-14%", "-17%", "-3%",
"5%", "7%"), class = "factor"), POVERTY = c(18.6, 22.6, 20.4,
14.4, 14.2, 8.6), POP_AGE1 = c(25.8, 16.9, 18.5, 17.1, 17.8,
15.2), POP_AGE2 = c(58.8, 59.6, 61.3, 60.4, 60.8, 60.3),
POP_AGE3 = c(15.4, 23.5, 20.2, 22.5, 21.4, 24.5), POP_URBAN = c(28.7,
55.2, 67, 76.2, 66.7, 66.4), POP_RURAL = c(71.3, 44.8, 33,
23.8, 33.3, 33.6), COMPUTER = c(46.4, 54.5, 66.1, 74, 65.1,
55.2), INTERNET = c(32.1, 41, 50.7, 66.5, 60, 50.7)), .Names = c("REGION",
"PCT_CHANGE_VOTE", "PCT_CHANGE_GRP", "PCT_CHANGE_INFLATION",
"PCT_CHANGE_UNEMP", "POVERTY", "POP_AGE1", "POP_AGE2", "POP_AGE3",
"POP_URBAN", "POP_RURAL", "COMPUTER", "INTERNET"), row.names = c(NA,
6L), class = "data.frame")
You could loop the columns 2:5 (lapply(data[2:5], ..)), remove the % in columns 2:5 (gsub('[%]',..)) and convert the columns to numeric. The output from gsub will be character class, convert it to numeric by as.numeric
data[2:5] <- lapply(data[2:5], function(x)
as.numeric(gsub('[%]', '', x)))
Cor1 <- round(cor(data[-1],use="complete.obs"),1)
Or you could remove the % in those columns using awk on shell (assuming ,
as delimiter)
awk 'BEGIN {OFS=FS=","} function SUB(F) {sub(/\%/,"", $F)}{SUB(2);SUB(3);SUB(4);SUB(5)}1' Bogdan.csv > Bogdan2.csv
Read the file with read.csv and run the cor
dat1 <- read.csv('Bogdan2.csv')
Cor2 <- round(cor(dat1[-1], use='complete.obs'), 1)
identical(Cor1, Cor2)
#[1] TRUE

How can I convert labels of a guide into a custom format?

I have this code to build my plot
here is merged1.data
structure(list(time = c(1391525413022, 1391525438998, 1391525440903,
1391525446466, 1391525451770, 1391525456050, 1391525472635, 1391525489437,
1391525502903, 1391525512793, 1391525524136, 1391525549132, 1391525562656,
1391525570842, 1391525578937, 1391525603513, 1391525858760, 1391525988388,
1391525991792, 1391526065759), time_print = structure(c(1391525413,
1391525438, 1391525440, 1391525446, 1391525451, 1391525456, 1391525472,
1391525489, 1391525502, 1391525512, 1391525524, 1391525549, 1391525562,
1391525570, 1391525578, 1391525603, 1391525858, 1391525988, 1391525991,
1391526065), class = c("POSIXct", "POSIXt"), tzone = ""), elapsed = c(2016,
1830, 2494, 2717, 2837, 1093, 1216, 2536, 2656, 2416, 2093, 2684,
1878, 2808, 2294, 1179, 1291, 1166, 1244, 1039), threads = c(7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), version = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "025.00", class = "factor")), .Names = c("time",
"time_print", "elapsed", "threads", "version"), row.names = c(NA,
-20L), class = "data.frame")
here is merged2.data
structure(list(time = c(1391525413022, 1391525438998, 1391525440903,
1391525446466, 1391525451770, 1391525456050, 1391525472635, 1391525489437,
1391525502903, 1391525512793, 1391525524136, 1391525549132, 1391525562656,
1391525570842, 1391525578937, 1391525603513, 1391525858760, 1391525988388,
1391525991792, 1391526065759), time_print = structure(c(1391525413,
1391525438, 1391525440, 1391525446, 1391525451, 1391525456, 1391525472,
1391525489, 1391525502, 1391525512, 1391525524, 1391525549, 1391525562,
1391525570, 1391525578, 1391525603, 1391525858, 1391525988, 1391525991,
1391526065), class = c("POSIXct", "POSIXt"), tzone = ""), elapsed = c(2016,
1830, 2494, 2717, 2837, 1093, 1216, 2536, 2656, 2416, 2093, 2684,
1878, 2808, 2294, 1179, 1291, 1166, 1244, 1039), threads = c(6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6), version = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "025.02", class = "factor")), .Names = c("time",
"time_print", "elapsed", "threads", "version"), row.names = c(NA,
-20L), class = "data.frame")
and the plot function
combined_plot <- ggplot(merged1.data, aes(x=threads, y=elapsed)) +
geom_point(aes(size = elapsed,shape=version, colour=time), show_guide = TRUE) +
scale_size(guide="none") +
scale_color_gradient(high = "black", low="green", name="Time") +
geom_point(data=merged2.data, aes(size = elapsed, shape=version, colour=time), show_guide = TRUE) +
scale_shape_manual(values=c(15,17), name="Versions")
Here I'm plotting response time vs current amount of threads running and gradient supposed to be a third dimention on the 2d screen just to indicate where we started capturing data and where we're ended to do this.
time is a timestamp like 1.391525e+12 (as it shows in the console) and when I compute gradient color with this values, it looks ugly on the legend.
So what I want is to compute gradient color depending on this list of numeric values but to show them converted (for instance, into a human-readable date/time) as labels on my gradient legend. Now this looks like on the picture below:
Define a formatter function as shown in the Cookbook:
datetime_formatter <- function(x) {
x <- as.POSIXct(x/1000, origin = "1970-01-01")
lab <- format(x, "%d.%m. %H:%M:%S")
lab
}
combined_plot <- ggplot(merged1.data, aes(x=threads, y=elapsed)) +
geom_point(aes(size = elapsed,shape=version, colour=time), show_guide = TRUE) +
scale_size(guide="none") +
scale_color_gradient(high = "black", low="green", name="Time", label=datetime_formatter) +
geom_point(data=merged2.data, aes(size = elapsed, shape=version, colour=time), show_guide = TRUE) +
scale_shape_manual(values=c(15,17), name="Versions")
print(combined_plot)
Edit:
Here is what I believe is a better way to present this data (assuming your real data is less redundant than what you show here).
DF <- rbind(merged1.data, merged2.data)
combined_plot <- ggplot(DF,
aes(x=time_print, y=elapsed, colour=factor(threads),
shape=version)) +
geom_point(size=5)
print(combined_plot)
However, it's difficult to recommend alternatives without having all the facts.

Resources