multiple line on same plot for timeseries

multiple line on same plot for timeseries - r

Below table has 366 days of data:
od
month dayofmonth total ad aont
1 1 1 27 9 18
2 1 2 31 24 7
3 1 3 30 25 5
4 1 4 29 15 14
5 1 5 27 1 26
6 1 6 30 18 12
7 1 7 31 8 23
8 1 8 30 9 21
9 1 9 25 23 2
10 1 10 31 15 16
11 1 11 27 17 7
12 1 12 27 3 24
13 1 13 26 11 15
14 1 14 28 12
library(zoo)
require(xts)
Dates <- seq(as.Date(f, "%Y - %m - %d"), as.Date(t, "%Y - %m - %d"), "day")
total<- xts(od$total, order.by = Dates)
dont<- xts(od$ad, order.by = Dates)
adont<- xts(od$aont, order.by = Dates)
I used zoo package now I want to draw multiple lines in same plot

Using plot.type="single" in the plot call can help with this.
#open libraries
library(xts)
library(zoo)
#set some random variables
a=rnorm(100)
b=rnorm(100)
#and some time series
c=seq(as.Date("2000/1/1"), by = "week", length.out = 100)
d=cbind(a,b)
#make it into an zoo object
d=as.zoo(d, order.by=c)
plot.zoo(d,
plot.type = "single",
col = c("red", "blue"))

Create the zoo object and then plot:
library(ggplot2)
library(zoo)
z <- zoo(od[3:5], as.Date(paste(2012, od$month, od$dayofmonth, sep = "-")))
autoplot(z, facet = NULL)

Related

Having a subset of a data set based on a specific condition

I have a data set which the values of "age" has different units (days, months, year). I want to convert the rows which their values are based on days and months to year. How I can do it in R?
If there is no letter after the number, then the unit is years.
If there is a ‘D’ after the number, then the unit is days (e.g. 10D means 10 days)
If there is an ‘M’ after the number, then the unit is months (e.g. 5M means 5 months).
Age <- c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
ID <- c(1,2,3,4,5,6,7,8,9,10,11,12,13)
Data <- data.frame(ID,Age)
> Data
ID Age
1 1 33
2 2 32
3 3 44
4 4 54M
5 5 67M
6 6 34D
7 7 33D
8 8 44
9 9 77
10 10 88M
11 11 49 D
12 12 55D
13 13 11M

Here's a quick way in base R:
Data$units = ifelse(grepl("M", Data$Age), "month", ifelse(grepl("D", Data$Age), "day", "year"))
Data$value = as.numeric(gsub(pattern = "[^0-9]", replacement = "", Data$Age))
Data$result = with(Data,
ifelse(units == "year", value,
ifelse(units == "month", value / 12, value / 365.25)))
Data
# ID Age units value result
# 1 1 33 year 33 33.00000000
# 2 2 32 year 32 32.00000000
# 3 3 44 year 44 44.00000000
# 4 4 54M month 54 4.50000000
# 5 5 67M month 67 5.58333333
# 6 6 34D day 34 0.09308693
# 7 7 33D day 33 0.09034908
# 8 8 44 year 44 44.00000000
# 9 9 77 year 77 77.00000000
# 10 10 88M month 88 7.33333333
# 11 11 49 D day 49 0.13415469
# 12 12 55D day 55 0.15058179
# 13 13 11M month 11 0.91666667

And here's another option using tidyverse tools:
library(dplyr)
library(stringr)
Data %>%
mutate(Unit = str_extract(string = Age,pattern = "[DM]"),
Unit = if_else(is.na(Unit),'Y',Unit),
Age = as.numeric(gsub(pattern = "[MD]","",Age))) %>%
mutate(AgeYears = Age / c('Y' = 1,'M' = 12,'D' = 365)[Unit])
ID Age Unit AgeYears
1 1 33 Y 33.00000000
2 2 32 Y 32.00000000
3 3 44 Y 44.00000000
4 4 54 M 4.50000000
5 5 67 M 5.58333333
6 6 34 D 0.09315068
7 7 33 D 0.09041096
8 8 44 Y 44.00000000
9 9 77 Y 77.00000000
10 10 88 M 7.33333333
11 11 49 D 0.13424658
12 12 55 D 0.15068493
13 13 11 M 0.91666667

#baseR
Age <-c("33","32","44","54M","67M","34D","33D","44","77","88M","49 D","55D","11M")
AgeNum<- as.numeric(sub("\\s*\\D$","",Age))
Age[grepl("M$",Age)] <- AgeNum[grepl("M$",Age)]/12
Age[grepl("D$",Age)] <- AgeNum[grepl("D$",Age)]/365
Age <- as.numeric(Age)
result:
> Age
[1] 33.00000000 32.00000000 44.00000000 4.50000000 5.58333333 0.09315068 0.09041096 44.00000000
[9] 77.00000000 7.33333333 0.13424658 0.15068493 0.91666667
>

Additionally, a further solution using data.table:
> library(data.table)
> dt <- data.table(ID, Age)
> dt[, Unit := ifelse(grepl("D$", Age), "D", ifelse(grepl("M$", Age), "M", "Y"))][
, Age := as.integer(gsub("M|D", "", Age))]
> dt[, Age_in_years := ifelse(Unit == "Y", Age,
ifelse(Unit == "M", Age / 12, Age / 365.25))][]
ID Age Unit Age_in_years
1: 1 33 Y 33.00000000
2: 2 32 Y 32.00000000
3: 3 44 Y 44.00000000
4: 4 54 M 4.50000000
5: 5 67 M 5.58333333
6: 6 34 D 0.09308693
7: 7 33 D 0.09034908
8: 8 44 Y 44.00000000
9: 9 77 Y 77.00000000
10: 10 88 M 7.33333333
11: 11 49 D 0.13415469
12: 12 55 D 0.15058179
13: 13 11 M 0.91666667

Assigning Values based on row value

I have a large vector (column of a data frame) where values containing integers 1 to 30. I want to replace numbers from 1 to 5 with 1, 6 to 10 with 5, 11 to 15 with 9...
> x3 <- sample(1:30, 100, rep=TRUE)
> x3
[1] 13 24 16 30 10 6 15 10 3 17 18 22 11 13 29 7 25 28 17 27 1 5 6 20 15 15 8 10 13 26 27 24 3 24 5 7 10 6 28 27 1 4 22 25 14 13 2 10 4 29 23 24 30 24 29 11 2 28 23 1 1 2
[63] 3 23 13 26 21 22 11 4 8 26 17 11 20 23 6 14 24 5 15 21 11 13 6 14 20 11 22 9 6 29 4 30 20 30 4 24 23 29
As I mentioned this is a column in a data frame and with above assignment I want to create a different column. If I do the following I have to do this 30 times.
myFrame$NewColumn[myFrame$oldColumn==1] <- 1
myFrame$NewColumn[myFrame$oldColumn==2] <- 1
myFrame$NewColumn[myFrame$oldColumn==3] <- 1
...
Whats a better way to do this?

We can do this with cut (suppose what you mean by '...' is 10, 11, 12):
x4 <- cut(x3,
breaks = c(seq(1, 30, 5), 30), right = F, include.lowest = T, # generate correct intervals
labels = 4 * (0:5) + 1) # number to fill
# x4 is factor. We should convert it to character first then to the number
x4 <- as.numeric(as.character(x4))

Did you try:
myFrame$NewColumn[myFrame$oldColumn > 0 & myFrame$oldColumn< 6] <- 1
myFrame$NewColumn[myFrame$oldColumn > 5 & myFrame$oldColumn< 11] <- 1
...
Or even better:
myFrame$NewColumn <- as.integer((myFrame$oldColumn - 1)/5)) * 4 + 1

string split in r at specific position [duplicate]

I have a dataframe where the date and time are mixed like this:
ID <- c(1,2,3,4)
DDMMYY <-c(100310,110310,120310,130310)
HHMM <- c(2205,1045,1110,2250)
df <- data.frame(ID,DDMMYY,HHMM)
df
ID DDMMYY HHMM
1 100310 2205
2 110310 1045
3 120310 1110
4 130310 2250
I want to split the date and time so that DD, MM, YY, HH and MM fall into separate columns like this:
ID DD MM YY HH MM
1 10 3 10 22 5
2 11 3 10 10 45
3 12 3 10 11 10
4 13 3 10 22 50
Any idea? Thanks

One option would be to use extract from tidyr
library(tidyr)
extract(extract(df, DDMMYY, c("DD","MM", "YY"), "(..)(..)(..)",
convert=TRUE), HHMM, c("HH", "MM"), "(..)(..)", convert=TRUE)
# ID DD MM YY HH MM
#1 1 10 3 10 22 5
#2 2 11 3 10 10 45
#3 3 12 3 10 11 10
#4 4 13 3 10 22 50
Or you could use strsplit from base R
df[,c("DD", "MM", "YY", "HH", "MM")] <- do.call(data.frame,lapply(df[,-1],
function(x) do.call(rbind,lapply(strsplit(as.character(x),
'(?<=..)(?=..)', perl=TRUE), as.numeric))))
df[,-(2:3)]
# ID DD MM YY HH MM.1
#1 1 10 3 10 22 5
#2 2 11 3 10 10 45
#3 3 12 3 10 11 10
#4 4 13 3 10 22 50

For fun, here are three more alternatives:
Convert your data to actual dates and use format
within(df, {
var <- paste(DDMMYY, HHMM)
var <- strptime(var, format = "%d%m%y %H%M")
mm <- format(var, "%M")
hh <- format(var, "%H")
YY <- format(var, "%y")
MM <- format(var, "%m")
DD <- format(var, "%d")
rm(var, DDMMYY, HHMM)
})
# ID DD MM YY hh mm
# 1 1 10 03 10 22 05
# 2 2 11 03 10 10 45
# 3 3 12 03 10 11 10
# 4 4 13 03 10 22 50
Use read.fwf (and a bunch of other nested stuff)
cbind(df[1],
setNames(do.call(
cbind, lapply(df[-1], function(x) {
read.fwf(textConnection(as.character(x)),
widths = rep(2, nchar(x[1])/2))
})),
c("DD", "MM", "YY", "hh", "mm")))
# ID DD MM YY hh mm
# 1 1 10 3 10 22 5
# 2 2 11 3 10 10 45
# 3 3 12 3 10 11 10
# 4 4 13 3 10 22 50
Use separate from "tidyr"
library(dplyr)
library(tidyr)
df %>%
separate(DDMMYY, into = c("DD", "MM", "YY"), sep = c(2, 4)) %>%
separate(HHMM, into = c("hh", "mm"), sep = 2)
# ID DD MM YY hh mm
# 1 1 10 03 10 22 05
# 2 2 11 03 10 10 45
# 3 3 12 03 10 11 10
# 4 4 13 03 10 22 50

reshaping columns in a data frame

Say I have the following data frame:
ID<-c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3, 4,4,4,4,4,5,5,5,5,5)
Score<- sample(1:20, 25, replace=TRUE)
days<-rep(c("Mon", "Tue", "Wed", "Thu", "Fri"), times=5)
t<-cbind(ID, Score, days)
I would like to reshape it so that the new columns are ID and the actual weekday names, (meaning 6 columns) and the Score values are distributed according to their ID and day name. Something like this:
I found that reshape package might do. Tried (melt and cast) but it did not produce the result I wanted, but something like in this post: Melt data for one column

A base R solution that uses the built-in reshape command.
set.seed(12345)
t <- data.frame(id = c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5),
score = sample(x = 1:20,size = 25,replace = TRUE),
days = rep(x = c("Mon","Tue","Wed","Thu","Fri"),times = 5))
t.wide <- reshape(data = t,
v.names = "score",
timevar = "days",
idvar = "id",
direction = "wide")
names(t.wide) <- gsub(pattern = "score.",replacement = "",x = names(t.wide),fixed = TRUE)
t.wide
id Mon Tue Wed Thu Fri
1 1 15 18 16 18 10
6 2 4 7 11 15 20
11 3 1 4 15 1 8
16 4 10 8 9 4 20
21 5 10 7 20 15 13

You can use reshape2 to do this, but you need a data.frame to do that. Using cbind produces a matrix. (And converts all your numerical variables to characters in this case, as matrices can only hold one data type).
I've changed your code to produce a dataframe, which is already in long format (one row per observation).
set.seed(123)
ID<-c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3, 4,4,4,4,4,5,5,5,5,5)
Score<- sample(1:20, 25, replace=TRUE)
days<-rep(c("Mon", "Tue", "Wed", "Thu", "Fri"), times=5)
dat<-data.frame(ID, Score, days)
Changing it to wide using reshape2 is then quite straightforward:
library(reshape2)
res <- dcast(ID~days,value.var="Score",data=dat)
> res
ID Fri Mon Thu Tue Wed
1 1 16 3 2 12 6
2 2 19 13 12 7 19
3 3 19 19 17 8 15
4 4 15 3 8 1 20
5 5 3 11 18 8 15

You could also use unstack if your data are complete (same number of days per id).
Here's an example (using the data from TARehman's answer):
unstack(t, score ~ days)
# Fri Mon Thu Tue Wed
# 1 10 15 18 18 16
# 2 20 4 15 7 11
# 3 8 1 1 4 15
# 4 20 10 4 8 9
# 5 13 10 15 7 20
Here's the clean-up for the column ordering, and for adding in the ID column:
cbind(ID = unique(t$id), unstack(t, score ~ days)[c("Mon", "Tue", "Wed", "Thu", "Fri")])
## ID Mon Tue Wed Thu Fri
## 1 1 15 18 16 18 10
## 2 2 4 7 11 15 20
## 3 3 1 4 15 1 8
## 4 4 10 8 9 4 20
## 5 5 10 7 20 15 13

Rather than reshape I'd move to the newer tidyr package and also make use of dplyr like so:
library(dplyr)
library(tidyr)
tdf<-as.data.frame(t) %>%
mutate(Score=as.numeric(Score)) %>%
spread(days,Score, fill=NA)
glimpse(tdf)
HTH

Just another option using splitstackshape
library(splitstackshape)
data = data.frame(t)
out = setnames(cSplit(setDT(data)[, .(x = toString(Score)), by = ID],
'x', ','), c('ID', unique(days)))
#> out
# ID Mon Tue Wed Thu Fri
#1: 1 8 14 11 5 10
#2: 2 16 1 4 14 8
#3: 3 8 18 19 13 3
#4: 4 16 9 19 16 6
#5: 5 7 2 1 2 13

Within both the dplyr & tidyr package, use spread to achieve the following:
library(dplyr)
library(tidyr)
t <- tbl_df(as.data.frame(t))
t %>% spread(days, Score, ID)
and you get the following output:
ID Fri Mon Thu Tue Wed
(fctr) (fctr) (fctr) (fctr) (fctr) (fctr)
1 1 10 10 18 17 10
2 2 18 11 14 3 16
3 3 11 13 9 15 17
4 4 13 13 16 17 11
5 5 7 14 9 15 20

R iterate over a data frame to add a new column with sequential values

Here is my data frame "data.frame"
X Y
1 10 12
2 20 22
3 30 32
Below what I want.
1) add a new colum named "New_col"
2) each cell of a given id is a sequence from X-value to Y-value (step of 1).
X Y New_col
1 10 12 10
11
12
2 20 22 20
21
22
3 30 32 30
31
32
Then fill the empty cells
X Y New_col
1 10 12 10
1 10 12 11
1 10 12 12
2 20 22 20
2 20 22 21
2 20 22 22
3 30 32 30
3 30 32 31
3 30 32 32
I tried the following:
New_col<-seq(from = data.frame$X, to = data.frame$Y, by = 1)
The problem it this code computes the sequence only for the first row.
Then I tried a loop:
for (i in 1: length(data.frame$X))
{
New_col <-seq(from = data.frame$X, to = data.frame$Y, by = 1)
}
This is the error I got:
Error in seq.default(from = data.frame$X, to = data.frame$Y, by = 1) :
'from' must be of length 1
Thank you for your help.

You can use apply:
do.call(rbind, apply(dat, 1, function(x)
data.frame(X = x[1], Y = x[2], New_col = seq(x[1], x[2]))))
where dat is the name of your data frame. You can ignore the warnings.
X Y New_col
1.1 10 12 10
1.2 10 12 11
1.3 10 12 12
2.1 20 22 20
2.2 20 22 21
2.3 20 22 22
3.1 30 32 30
3.2 30 32 31
3.3 30 32 32

This is a good use case for the data.table package (which you would have to install first):
dat = read.table(text=" X Y
1 10 12
2 20 22
3 30 32")
library(data.table)
dt = as.data.table(dat)
Once you've got your data table set up, by makes this operation easy:
dt2 = dt[, list(New_col=seq(X, Y)), by=c("X", "Y")]
# X Y New_col
# 1: 10 12 10
# 2: 10 12 11
# 3: 10 12 12
# 4: 20 22 20
# 5: 20 22 21
# 6: 20 22 22
# 7: 30 32 30
# 8: 30 32 31
# 9: 30 32 32
(The only disclaimer is that this will not work if there are duplicate (X, Y) pairs in your original data frame).

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

multiple line on same plot for timeseries - r

Create the zoo object and then plot: library(ggplot2) library(zoo) z <- zoo(od[3:5], as.Date(paste(2012, od$month, od$dayofmonth, sep = "-"))) autoplot(z, facet = NULL)

Related

Having a subset of a data set based on a specific condition

Assigning Values based on row value

string split in r at specific position [duplicate]

reshaping columns in a data frame

R iterate over a data frame to add a new column with sequential values

Categories

Resources