R Converting Data Frame from Long to Wide - r

I'm trying to convert a data frame from long to wide format, but I'm running into the same issue - I get NA's. I think there might be something wrong with the aggregate function I'm using:
library(reshape2)
library(plyr)
ID_NUMERIC <- c(5525,5525,5525,5525,5525,8523,8523,8523,8523,8523,4569,4569,4569,4569,4569)
SAMPLE_NAME <-c("HX44","HX44","HX44","HX44","HX44","RT5","RT5","RT5","RT5","RT5","OP1","OP1","OP1","OP1","OP1")
DATE <- c(as.Date("1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014"))
ANALYSIS<- c("P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8")
COMPONENT_NAME <- c("Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8")
RESULT <- c(0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2)
NAME <- c("HX","HX","HX","HX","HX","RT","RT","RT","RT","RT","OP","OP","OP","OP","OP")
first <- data.frame(ID_NUMERIC,SAMPLE_NAME,DATE,COMPONENT_NAME,ANALYSIS,RESULT,NAME)
second <- ddply(first, .(COMPONENT_NAME), function(x){x$id=1:nrow(x);x})
last <- dcast(second, NAME+SAMPLE_NAME+DATE+ID_NUMERIC+ANALYSIS+id~COMPONENT_NAME, value.var="RESULT")

You could put the data into a matrix and work with it from there?
ID_NUMERIC <- c(5525,5525,5525,5525,5525,8523,8523,8523,8523,8523,4569,4569,4569,4569,4569)
SAMPLE_NAME <-c("HX44","HX44","HX44","HX44","HX44","RT5","RT5","RT5","RT5","RT5","OP1","OP1","OP1","OP1","OP1")
COMPONENT_NAME<-c("Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8")
NAME<-c("HX","HX","HX","HX","HX","RT","RT","RT","RT","RT","OP","OP","OP","OP","OP")
ANALYSIS<-c("P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8")
DATE <- c("1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014")
RESULT <- c(0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2)
data<-matrix(c(ID_NUMERIC,SAMPLE_NAME,DATE,ANALYSIS,COMPONENT_NAME,RESULT,NAME),nrow=7,byrow=TRUE)
rownames(data)<-c("ID_NUMERIC","SAMPLE_NAME","DATE","ANALYSIS","COMPONANT_NAME","RESULT","NAME")
View(data)
data.frame(data)

Related

How to apply `lapply` function to a set of time series data set

I have time-series data. The data contains four univariate time-series columns over several years. I would like to fit ARIMA model to each univariate time-series data for each year of the first 4 years.
I tried this:
library(ggplot2)
library(forecast)
library(tseries)
library(zoo)
library(dplyr)
library(data.table)
data("EuStockMarkets")
dat <- EuStockMarkets
res <- lapply(split(as.zoo(EuStockMarkets), as.integer(time(EuStockMarkets))), as.ts)
datNew <- lapply(5:8, function(i) ts(res[[i]]))
dat.log <- lapply(1:4, function(i) log(datNew[[i]]))
dat.diff <- lapply(1:4, function(i)diff(dat.log[[i]]))
Logreturns <- dat.diff
The following code does not work properly as it gave me only 4 values. However, I accept it to give me 16 values (4 years, 4 univariate time series in each year)
Arima.model <- lapply(1:4, function(i)auto.arima(Logreturns[[i]][,i]))
I think what you need is double lapply, one to iterate over each list and another to iterate over each column of the list.
result <- unlist(lapply(seq_along(Logreturns), function(i)
lapply(seq_len(ncol(Logreturns[[i]])), function(j)
auto.arima(Logreturns[[i]][,j]))), recursive = FALSE)

na.omit function is not removing rows containing NA

Hi there I am looking on the internet what is wrong and the na.omit() function is not removing the rows with NA. Could you please help me?
library(TTR)
library(quantmod)
library(doParallel) #this library is for parallel core processing
StartDate = "2010-01-01"
EndDate = "2020-03-20"
myStock <- c("AMZN")
getSymbols(myStock, src="yahoo", from=StartDate, to=EndDate)
gdat <-coredata(AMZN$AMZN.Close) # Create a 2-d array of all the data. Or...
Data <- data.frame(date=index(AMZN), coredata(AMZN)) # Create a data frame with the data and (optionally) maintain the date as an index
Data$rsi22 <- data.frame(RSI(Cl(Data), n=22))
Data$rsi44 <- data.frame(RSI(Cl(Data), n=44))
colnames(Data)
DatanoNA <- na.omit(Data) #remove rows with NAs
I think you're looking for the complete.cases() function. na.omit() is for removing NA values in a vector, not for removing rows containing NA values from a data frame.
Also, your data frame construction is a little wonky (see below for more explanation). Try this:
Data <- data.frame(date=index(AMZN), coredata(AMZN),
rsi22=RSI(Cl(Data), n=22),
rsi44=RSI(Cl(Data), n=44))
nrow(Data)
nrow(Data[complete.cases(Data),])
Normally every column of a data frame is a vector. The results of RSI() are stored as a vector. When you say
Data$rsi22 <- data.frame(RSI(Cl(Data), n=22))
what you're doing is wrapping the results in a data frame and then embedding it an another data frame (Data), which is something you can legally do in R but which is unusual and confuses a lot of the standard data-processing functions.
You could try complete.cases
DatanoNA <- Data[complete.cases(Data),]

How to change data type of column in Data frame to Date from Char

I'm messing with some columns in R using RStudio and have tried to change the data type of one of the columns from Char to Date.
I have used a few options and the one that came the closest was
data$Date <- as.Date(as.character(data$Date))
Though even this doesn't seem to work as it changes the values of the column to some weird values
i.e. from
To something like
I can't quite figure out why the transformation isn't working.
Here is my code up until that point
# load the tidyverse library
library("tidyverse")
setwd("C:/Users/ibrahim.cetinkaya/OneDrive - NTT/Desktop/data")
##################### Part A #####################
# data files (you need to specify the paths of the CSV files (e.g. relativeor absolute) )
files <- c("data/201808.csv",
"data/201809.csv",
"data/201810.csv",
"data/201811.csv",
"data/201812.csv",
"data/201901.csv",
"data/201902.csv",
"data/201903.csv",
"data/201904.csv",
"data/201905.csv",
"data/201906.csv",
"data/201908.csv"
)
#Concatenate into one data frame.
data <- data.frame()
for (i in 1:length(files)){
temp <- read_csv(files[i], skip = 7)
data <- rbind(data, temp)
}
#View to verify
view(data)
#Part 2
#Remove vairables which have no data at all (All the data are na's)
#Remove variables that doesn't have adequate data (70% of the number of records are NA's)
data <- data[rowMeans(is.na(data))<=0.9,]
view(data)
#Change the column names to have no spaces between the words
names(data) <- gsub(" ", "_", names(data))
view(data)
#Convert Date to date type
#df2 <- data %>% mutate_at(vars(data), as.Date, format="%m-%d-%Y")
#data %>% mutate(data$Date==as.Date(Date, format = "%m.%d.%Y"))
data$Date <- as.Date(as.character(data$Date))
#^^^ This doesn't seem to be working properly ^^^
#Checking if it worked
typeof(data$Date)
view(data)
Any suggestions would be appreciated.
I want to be able to change the data type and then extract the month and use it for grouping some of the other data in my frame.
Use
data$Date <- as.Date(data$date, "%m/%d/%Y")
and then to extract month
data$Month <- format(data$Date, "%m")
We can also use lubridate
data$date <- lubridate::mdy(data$date)
and use month to extract the month.
data$month <- month(data$date)
and with anytime
data$Date <- anytime::anydate(data$Date)

Data in a dataset are shifted irregularly in R

I am trying to simulate an experiment in a mathematical model. Resulting dataset contains data from the experiment - output data (out_exp) which are a result of input data (inp_exp) - and data from the simulation of the experiment- output data (out_sim) which are a result of input data (inp_sim).
When I merge all data to a dataframe, an irregular shift among simulated and experiment datasets can be found. In order to be able to compare and evaluate the out_exp and out_sim, both inp_exp and inp_sim has to start from the same date. It means I need to shift data from simulation (inp_sim and out_sim) together according to input data (ie. inp_exp[i] == inp_sim[i]). A problem is that the shift between input data is not regular (see data below).
Does any one have an idea how to do it? Thank you in advance.
Original data:
inp_exp <- c(0,0,5,1,2,3,4,0,0,0,1,4,8,1,2,0,0,0,0,1,5,8,9,9,1,0,0,0)
inp_sim <- c(0,0,0,5,1,2,3,4,0,0,0,0,0,0,1,4,8,1,2,0,1,5,8,9,9,1,0,0)
out_exp <- c(0,0,0,1,4,5,1,0,0,0,0,1,2,4,1,0,0,0,0,0,2,4,5,8,2,0,0,0)
out_sim <- c(0,0,0,0,0,1,2,1,0,0,0,0,0,0,0,1,2,3,1,0,0,0,1,5,6,4,1,0)
D <- seq(as.Date("2018/10/2"), by = "day", length.out = length(inp_exp))
df <- data.frame(D, inp_exp, inp_sim, out_exp, out_sim)
df
Expected result:
inp_exp <- c(0,0,5,1,2,3,4,0,0,0,1,4,8,1,2,0,0,0,0,1,5,8,9,9,1,0,0,0)
inp_sim <- c(0,0,5,1,2,3,4,0,0,0,1,4,8,1,2,0,0,0,0,1,5,8,9,9,1,0,0,0)
out_exp <- c(0,0,0,1,4,5,1,0,0,0,0,1,2,4,1,0,0,0,0,0,2,4,5,8,2,0,0,0)
out_sim <- c(0,0,0,0,1,2,1,0,0,0,0,1,2,3,1,0,0,0,0,0,0,1,5,6,4,1,0,0)
D <- seq(as.Date("2018/10/2"), by = "day", length.out = length(inp_exp))
df <- data.frame(D, inp_exp, inp_sim, out_exp, out_sim)
df

R move named column to the end of a data frame

I'm trying to move a column to the end of a data frame and I'm struggling
output_index <- grep(output, names(df))
df <- cbind(df[,-output_index], df[,output_index])
This orders the data properly, however it converts the data to a matrix which doesn't work. How can I do this without losing the column names and keeping the data as a data frame.
Didn't need the , in front of the index:
output_index <- grep(output, names(df))
df <- cbind(df[-output_index], df[output_index])
df <- data.frame(id=1:10, output=rnorm(10,1,1), input=rnorm(10,1,1))
output_index <- grep("output", names(df))
res.df <- cbind(df[,-output_index], df[,output_index])

Resources