I have a big timeseries dataset which looks like the table below. T0, T1, T2,... (goes on till T70) are the timestamps and over 400 batches (A,B,C,...). There are multiple features in the data (Description Column in the sample data) which I'm interested in plotting. My first attempt was to separate the dataset for each description so that I get one row per batch in each subset ranging from T0 to T70.
My aim is to convert this dataframe into a timeseries object and check for seasonality for Good and bad batches (for each description). Can someone help with any easy fixes in R? Thanks!
Update:
My subset of the data for one Description looks like this:
In order to melt the data, I used:
mdf <- melt(df,id.vars = c('Batch',colnames(df[, c(2:70)])))
and it didn't work. I want to get just three variables out of it:
Batch - Time - Value.
Any help would be appreciated!
EDIT:dput(head(df,20)) gave the following output. I have truncated the output till T20 instead of T70.
structure(list(Batch = c("A", "B", "C",
"D", "E", "F", "G", "H",
"I", "J", "K", "L", "M",
"N", "O", "P", "Q", "R",
"S", "T"),
T0 = c(5, 6,
4, 2, 6, 3, 4, 6, 4, 1, 6, 5, 4, 5, 6, 5, 6, 5,
5, 6), T1 = c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 5, 6, 6), T2 = c(6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6), T3 = c(20,
19, 19, 19, 19, 18, 20, 20, 20, 20, 20, 20, 20, 19,
18, 19, 20, 20, 20, 19), T4 = c(21, 21, 21, 21, 20,
20, 21, 21, 21, 21, 22, 21, 22, 21, 21, 21, 22, 21,
22, 20), T5 = c(22, 22, 22, 22, 22, 21, 21, 22, 21,
22, 23, 22, 23, 22, 22, 23, 23, 23, 23, 22), T6 = c(23,
23, 24, 23, 23, 23, 23, 23, 23, 24, 24, 23, 23, 24,
23, 24, 24, 24, 24, 23), T7 = c(25, 25, 25, 24, 24,
24, 24, 25, 25, 25, 24, 25, 24, 25, 25, 26, 25, 25,
25, 25), T8 = c(26, 26, 25, 26, 25, 26, 26, 26, 26,
26, 25, 26, 26, 26, 26, 26, 25, 26, 25, 26), T9 = c(20,
23, 19, 21, 22, 27, 24, 26, 24, 25, 21, 23, 21, 22,
28, 22, 20, 24, 19, 27), T10 = c(16, 18, 14, 15, 15,
23, 19, 20, 19, 20, 15, 16, 15, 17, 23, 16, 15, 18,
15, 23), T11 = c(15, 16, 15, 15, 16, 17, 15, 14, 15,
15, 15, 14, 15, 15, 17, 15, 15, 15, 15, 17), T12 = c(15,
16, 15, 15, 16, 14, 17, 15, 15, 15, 15, 15, 15, 16,
15, 15, 15, 16, 15, 15), T13 = c(15, 16, 15, 15, 16,
15, 15, 15, 15, 15, 15, 15, 15, 16, 15, 15, 15, 16,
14, 15), T14 = c(16, 16, 15, 16, 16, 15, 16, 15, 16,
15, 15, 15, 15, 16, 16, 15, 16, 16, 15, 16), T15 = c(16,
16, 16, 16, 17, 15, 16, 15, 16, 15, 16, 15, 16, 16,
16, 16, 16, 16, 15, 16), T16 = c(16, 17, 16, 16, 17,
15, 17, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
15, 16), T17 = c(17, 19, 17, 18, 20, 15, 18, 15, 16,
16, 18, 16, 18, 19, 19, 17, 19, 17, 17, 17), T18 = c(24,
26, 27, 26, 28, 22, 25, 20, 25, 20, 26, 25, 27, 26,
25, 25, 28, 25, 27, 24), T19 = c(36, 37, 36, 38, 36,
38, 37, 31, 36, 26, 36, 37, 36, 36, 37, 36, 37, 35,
35, 35), T20 = c(38, 39, 37, 38, 38, 43, 39, 41, 39,
40, 38, 39, 38, 39, 43, 38, 37, 39, 37, 42)), row.names = c(NA,
20L), class = "data.frame")
As long as you don't have data for reproducible practice of the problem, I will add some dummy data. For future questions dput() your data and paste with your question. Your issue can be solved melting your data. In this method with the function melt() from reshape2 you choose variables to be ids and the rest of variables are made rows with a reference in a key variable. Next, I apply that method and I build some plots related to what you want:
library(reshape2)
library(ggplot2)
#Data
df <- data.frame(Batch=rep(c('A','B','C'),2),
Type=c('Good','Bad','Good','Good','Bad','Good'),
Description=c(rep('In',3),rep(c('Out'),3)),
T0=c(1,2,1,4,3,2),
T1=c(2,3,4,1,3,4),
T2=c(3,5,3,5,5,6),stringsAsFactors = F)
#Melt
mdf <- melt(df,id.vars = c('Batch','Type','Description'))
#Plot for description
ggplot(mdf,aes(x=Description,y=value,fill=variable))+
geom_bar(stat='identity')
Using Description on x-axis you will get this:
Also you can wrap by some variable to get different plots like this using facet_wrap():
#Wrap by description
ggplot(mdf,aes(x=Batch,y=value,fill=variable))+
geom_bar(stat='identity')+
facet_wrap(.~Description)
With the melted data mdf you can play and obtain other plots you want.
Update: With the data provided, here a possible solution to your issue:
library(tidyverse)
#Data
dff <- structure(list(Batch = c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T"),
T0 = c(5, 6, 4, 2, 6, 3, 4, 6, 4, 1, 6, 5, 4, 5, 6, 5, 6,
5, 5, 6), T1 = c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 5, 6, 6), T2 = c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 5, 6, 6, 6, 6, 6), T3 = c(20, 19, 19, 19, 19, 18,
20, 20, 20, 20, 20, 20, 20, 19, 18, 19, 20, 20, 20, 19),
T4 = c(21, 21, 21, 21, 20, 20, 21, 21, 21, 21, 22, 21, 22,
21, 21, 21, 22, 21, 22, 20), T5 = c(22, 22, 22, 22, 22, 21,
21, 22, 21, 22, 23, 22, 23, 22, 22, 23, 23, 23, 23, 22),
T6 = c(23, 23, 24, 23, 23, 23, 23, 23, 23, 24, 24, 23, 23,
24, 23, 24, 24, 24, 24, 23), T7 = c(25, 25, 25, 24, 24, 24,
24, 25, 25, 25, 24, 25, 24, 25, 25, 26, 25, 25, 25, 25),
T8 = c(26, 26, 25, 26, 25, 26, 26, 26, 26, 26, 25, 26, 26,
26, 26, 26, 25, 26, 25, 26), T9 = c(20, 23, 19, 21, 22, 27,
24, 26, 24, 25, 21, 23, 21, 22, 28, 22, 20, 24, 19, 27),
T10 = c(16, 18, 14, 15, 15, 23, 19, 20, 19, 20, 15, 16, 15,
17, 23, 16, 15, 18, 15, 23), T11 = c(15, 16, 15, 15, 16,
17, 15, 14, 15, 15, 15, 14, 15, 15, 17, 15, 15, 15, 15, 17
), T12 = c(15, 16, 15, 15, 16, 14, 17, 15, 15, 15, 15, 15,
15, 16, 15, 15, 15, 16, 15, 15), T13 = c(15, 16, 15, 15,
16, 15, 15, 15, 15, 15, 15, 15, 15, 16, 15, 15, 15, 16, 14,
15), T14 = c(16, 16, 15, 16, 16, 15, 16, 15, 16, 15, 15,
15, 15, 16, 16, 15, 16, 16, 15, 16), T15 = c(16, 16, 16,
16, 17, 15, 16, 15, 16, 15, 16, 15, 16, 16, 16, 16, 16, 16,
15, 16), T16 = c(16, 17, 16, 16, 17, 15, 17, 15, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 15, 16), T17 = c(17, 19,
17, 18, 20, 15, 18, 15, 16, 16, 18, 16, 18, 19, 19, 17, 19,
17, 17, 17), T18 = c(24, 26, 27, 26, 28, 22, 25, 20, 25,
20, 26, 25, 27, 26, 25, 25, 28, 25, 27, 24), T19 = c(36,
37, 36, 38, 36, 38, 37, 31, 36, 26, 36, 37, 36, 36, 37, 36,
37, 35, 35, 35), T20 = c(38, 39, 37, 38, 38, 43, 39, 41,
39, 40, 38, 39, 38, 39, 43, 38, 37, 39, 37, 42)), row.names = c(NA,
-20L), class = "data.frame")
Next the code:
#Code
Melted <- pivot_longer(dff,cols = -Batch)
Melted$name <- factor(Melted$name,levels = unique(Melted$name))
#Plot
ggplot(Melted,aes(x=Batch,y=value,color=name,group=name))+geom_line()
I am trying to subset my dataset as follows
df[df$Age > 19,]
I am seeing an error , Error: x and labels must be same type
I am not sure I understand this, any suggestions are much appreciated.
=================
dput(df$Age)
c(20, 11, 10, 15, 6, 23, 45, 30, 18, 11, 15, 20, 7, 18, 19, 30,
40, 16, 14, 33, 12, 22, 12, 5, NA, 18, 30, 26, 25, 27, 12, 27,
13, 15, 32, 19, NA, 18, 13, 30, 10, 16, 47, 24, 64, 21, 9, 30,
12, 33, 16, 20, 14, 10, 19, 18, 20, 18, 10, 15, 55, 18, 50, 14,
35, 18, 21, 17, 14, 9, 25, 17, 10, 16, 12, 30, 38, 10, 27, 20,
27, 16, 30, 11, 5, 20, 30, 12, 24, 11, 7, 26, 48, 25, 20, 18,
27, 18, 28, 15, 17, 46, 30, 20, 20, 14, 35, 31, 10, 26, 13, NA,
15, 3, 30, 33, 15, 43, 19, 40, 8, 16, 8, 3, 37, 40, 58, 18, 12,
19, 14, 24, 34, 30, 23, 28, 47, 29, 21, 35, 23, 47, 11, 30, 16,
25, 30, 30, 8, 18, 20, 12, 8, 18, 30, 6, 54, 60, 18, 27, 42,
6, 42, 13, 21, 15, 17, 10, 33, 15, 16, 36, 16, 52, 4, 30, 28,
30, 14, 13, 14, NA, 15, 20, 20, 24, 27, 23, 10, 13, 22, 30, 45,
10, 23, 14, 27, 19, 12, 25, 10, 10, 14, 16, 16, 19, 18, 12, 65,
18, 35, 20, 31, NA, 21, 40, 8, 13, 25, 8, 13, 15, 19, 25, 10,
9, 24, 8, 25, 30, 38, 35, 20, 12, 15, 25, 27, 39, 8, 10, NA,
12, 50, 16, 14, 22, 12, 20, 44, 13, 8, 43, 48, 13, 21, 20, 42,
11, 20, 35, 53, 22, 17, 5, NA, 14, 10, 21, 33, 21, 69, 24, 15,
12, 8, 28, 11, 32, 25, 26, 21, 36, 12, 24, 20, 23, 14, 30, 50,
26, NA, 30, 22, 44, 22, 14, 30, 28, 10, 16, 32, 35, 40, 16, 40,
33, 23, 25, 10, 17, 10, 14, 22, 14, 25, 20, 39, 24, 52, 16, 34,
26, 23, 11, 12, 70, 59, 12, 38, 22, 13, 40, 57, 30, 7, 21, 20,
30, 12, 13, 5, 19, 35, 56, 17, 40, 48, 19, 8, 30, 21, 5, 40,
16, 22, 20, 17, 16, 30, 18, 13, 17, NA, 40, 9, 24, 26, 20, 22,
17, 44, 45, 18, 26, 50, 10, 21, 15, NA, 20, 12, 16, 54, 15, 16,
33, 22, 26, 60, 35, 11, 30, 16, 48, 16, 16, 16, 10, 14, 15, 23,
17, 18, NA, 49, 12, 7, 18, 24, 17, 14, 30, 13, 6, 51, 36, 16,
10, 43, 34, 15, 12, 15, 15, 17, 40, 58, 15, 33, 16, 48, 25, 15,
16, 5, NA, 40, 34, 10, 30, 30, 30, 15, 15, 12, 5, 10, 20, 18,
20, 16, 20, 26, 12, 14, 14, 20, 12, 30, 30, 29, 22, 19, 26, 11,
23, 40, 30, 16, 50, 20, 25, 29, 40, 44, 20, 40, 8, 16, 15, 38,
11, 27, 63, 16, NA, 47, 65, 21, 29, 30, 16, 21, 25, 16, 23, 5,
17, 22, 12, 14, 27, NA, 16, 9, 33, 11, 15, 34, 41, 30, 33, 15,
25, 40, 25, 12, 12, 17, 14)
I'm trying to make a surface graph but I can not do it. The graphic is not pretty and I have tried in several forums how to do and I did not succeed.
library(scatterplot3d)
x1 <- rep(10, 6)
x2 <- rep(15, 6)
x3 <- rep(20, 6)
x4 <- rep(25, 6)
x5 <- rep(30, 6)
x <- c(x1, x2, x3, x4, x5)
y1 <- rep(7, 30)
y2 <- rep(21, 30)
y3 <- rep(35, 30)
y <- c(y1, y2, y3)
z = 1781.166805 + 52.445903*y + 203.454647*x -1.570445*x*y -4.119635*(x**2)
scatterplot3d(x, y, z)
I'd highly appreciate if you help me!
First off, for future postings, please use the code tags to properly format your code. Secondly, your formula for z is not valid R syntax.
Lastly, I'd strongly recommend to spend some time taking the tour and learning how to ask good questions.
scatterplot3d allows you to plot a three dimensional point cloud, not a surface. Based on the (poorly formatted) data you provide, this works just fine:
library(scatterplot3d);
z <- 1781.166805 + 52.445903 * y + 203.454647 * x -1.570445 * x * y -4.119635 * x^2;
scatterplot3d(x, y, z);
Update
If you want to have a 3d surface (mesh) plot, you can use e.g. plotly:
require(plotly);
plot_ly(x = ~x, y = ~y, z = ~z, type = "mesh3d");
which produces an interactive (rotatable, zoomable) plot, a screenshot of which looks like this:
Sample data
x <- c(10, 10, 10, 10, 10, 10, 15, 15, 15, 15, 15, 15, 20, 20, 20,
20, 20, 20, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30, 30, 30, 10,
10, 10, 10, 10, 10, 15, 15, 15, 15, 15, 15, 20, 20, 20, 20, 20,
20, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30, 30, 30, 10, 10, 10,
10, 10, 10, 15, 15, 15, 15, 15, 15, 20, 20, 20, 20, 20, 20, 25,
25, 25, 25, 25, 25, 30, 30, 30, 30, 30, 30);
y <- c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35);