R Converting Data Frame from Long to Wide - r
I'm trying to convert a data frame from long to wide format, but I'm running into the same issue - I get NA's. I think there might be something wrong with the aggregate function I'm using:
library(reshape2)
library(plyr)
ID_NUMERIC <- c(5525,5525,5525,5525,5525,8523,8523,8523,8523,8523,4569,4569,4569,4569,4569)
SAMPLE_NAME <-c("HX44","HX44","HX44","HX44","HX44","RT5","RT5","RT5","RT5","RT5","OP1","OP1","OP1","OP1","OP1")
DATE <- c(as.Date("1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014"))
ANALYSIS<- c("P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8")
COMPONENT_NAME <- c("Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8")
RESULT <- c(0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2)
NAME <- c("HX","HX","HX","HX","HX","RT","RT","RT","RT","RT","OP","OP","OP","OP","OP")
first <- data.frame(ID_NUMERIC,SAMPLE_NAME,DATE,COMPONENT_NAME,ANALYSIS,RESULT,NAME)
second <- ddply(first, .(COMPONENT_NAME), function(x){x$id=1:nrow(x);x})
last <- dcast(second, NAME+SAMPLE_NAME+DATE+ID_NUMERIC+ANALYSIS+id~COMPONENT_NAME, value.var="RESULT")
You could put the data into a matrix and work with it from there?
ID_NUMERIC <- c(5525,5525,5525,5525,5525,8523,8523,8523,8523,8523,4569,4569,4569,4569,4569)
SAMPLE_NAME <-c("HX44","HX44","HX44","HX44","HX44","RT5","RT5","RT5","RT5","RT5","OP1","OP1","OP1","OP1","OP1")
COMPONENT_NAME<-c("Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8","Density","Gravity","C6","C7","C8")
NAME<-c("HX","HX","HX","HX","HX","RT","RT","RT","RT","RT","OP","OP","OP","OP","OP")
ANALYSIS<-c("P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8","P8")
DATE <- c("1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/1/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/15/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014","1/3/2014")
RESULT <- c(0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2,0.8593,33.13,2.1,2.3,2.2)
data<-matrix(c(ID_NUMERIC,SAMPLE_NAME,DATE,ANALYSIS,COMPONENT_NAME,RESULT,NAME),nrow=7,byrow=TRUE)
rownames(data)<-c("ID_NUMERIC","SAMPLE_NAME","DATE","ANALYSIS","COMPONANT_NAME","RESULT","NAME")
View(data)
data.frame(data)
Related
How to apply `lapply` function to a set of time series data set
I have time-series data. The data contains four univariate time-series columns over several years. I would like to fit ARIMA model to each univariate time-series data for each year of the first 4 years. I tried this: library(ggplot2) library(forecast) library(tseries) library(zoo) library(dplyr) library(data.table) data("EuStockMarkets") dat <- EuStockMarkets res <- lapply(split(as.zoo(EuStockMarkets), as.integer(time(EuStockMarkets))), as.ts) datNew <- lapply(5:8, function(i) ts(res[[i]])) dat.log <- lapply(1:4, function(i) log(datNew[[i]])) dat.diff <- lapply(1:4, function(i)diff(dat.log[[i]])) Logreturns <- dat.diff The following code does not work properly as it gave me only 4 values. However, I accept it to give me 16 values (4 years, 4 univariate time series in each year) Arima.model <- lapply(1:4, function(i)auto.arima(Logreturns[[i]][,i]))
I think what you need is double lapply, one to iterate over each list and another to iterate over each column of the list. result <- unlist(lapply(seq_along(Logreturns), function(i) lapply(seq_len(ncol(Logreturns[[i]])), function(j) auto.arima(Logreturns[[i]][,j]))), recursive = FALSE)
na.omit function is not removing rows containing NA
Hi there I am looking on the internet what is wrong and the na.omit() function is not removing the rows with NA. Could you please help me? library(TTR) library(quantmod) library(doParallel) #this library is for parallel core processing StartDate = "2010-01-01" EndDate = "2020-03-20" myStock <- c("AMZN") getSymbols(myStock, src="yahoo", from=StartDate, to=EndDate) gdat <-coredata(AMZN$AMZN.Close) # Create a 2-d array of all the data. Or... Data <- data.frame(date=index(AMZN), coredata(AMZN)) # Create a data frame with the data and (optionally) maintain the date as an index Data$rsi22 <- data.frame(RSI(Cl(Data), n=22)) Data$rsi44 <- data.frame(RSI(Cl(Data), n=44)) colnames(Data) DatanoNA <- na.omit(Data) #remove rows with NAs
I think you're looking for the complete.cases() function. na.omit() is for removing NA values in a vector, not for removing rows containing NA values from a data frame. Also, your data frame construction is a little wonky (see below for more explanation). Try this: Data <- data.frame(date=index(AMZN), coredata(AMZN), rsi22=RSI(Cl(Data), n=22), rsi44=RSI(Cl(Data), n=44)) nrow(Data) nrow(Data[complete.cases(Data),]) Normally every column of a data frame is a vector. The results of RSI() are stored as a vector. When you say Data$rsi22 <- data.frame(RSI(Cl(Data), n=22)) what you're doing is wrapping the results in a data frame and then embedding it an another data frame (Data), which is something you can legally do in R but which is unusual and confuses a lot of the standard data-processing functions.
You could try complete.cases DatanoNA <- Data[complete.cases(Data),]
How to change data type of column in Data frame to Date from Char
I'm messing with some columns in R using RStudio and have tried to change the data type of one of the columns from Char to Date. I have used a few options and the one that came the closest was data$Date <- as.Date(as.character(data$Date)) Though even this doesn't seem to work as it changes the values of the column to some weird values i.e. from To something like I can't quite figure out why the transformation isn't working. Here is my code up until that point # load the tidyverse library library("tidyverse") setwd("C:/Users/ibrahim.cetinkaya/OneDrive - NTT/Desktop/data") ##################### Part A ##################### # data files (you need to specify the paths of the CSV files (e.g. relativeor absolute) ) files <- c("data/201808.csv", "data/201809.csv", "data/201810.csv", "data/201811.csv", "data/201812.csv", "data/201901.csv", "data/201902.csv", "data/201903.csv", "data/201904.csv", "data/201905.csv", "data/201906.csv", "data/201908.csv" ) #Concatenate into one data frame. data <- data.frame() for (i in 1:length(files)){ temp <- read_csv(files[i], skip = 7) data <- rbind(data, temp) } #View to verify view(data) #Part 2 #Remove vairables which have no data at all (All the data are na's) #Remove variables that doesn't have adequate data (70% of the number of records are NA's) data <- data[rowMeans(is.na(data))<=0.9,] view(data) #Change the column names to have no spaces between the words names(data) <- gsub(" ", "_", names(data)) view(data) #Convert Date to date type #df2 <- data %>% mutate_at(vars(data), as.Date, format="%m-%d-%Y") #data %>% mutate(data$Date==as.Date(Date, format = "%m.%d.%Y")) data$Date <- as.Date(as.character(data$Date)) #^^^ This doesn't seem to be working properly ^^^ #Checking if it worked typeof(data$Date) view(data) Any suggestions would be appreciated. I want to be able to change the data type and then extract the month and use it for grouping some of the other data in my frame.
Use data$Date <- as.Date(data$date, "%m/%d/%Y") and then to extract month data$Month <- format(data$Date, "%m") We can also use lubridate data$date <- lubridate::mdy(data$date) and use month to extract the month. data$month <- month(data$date) and with anytime data$Date <- anytime::anydate(data$Date)
Data in a dataset are shifted irregularly in R
I am trying to simulate an experiment in a mathematical model. Resulting dataset contains data from the experiment - output data (out_exp) which are a result of input data (inp_exp) - and data from the simulation of the experiment- output data (out_sim) which are a result of input data (inp_sim). When I merge all data to a dataframe, an irregular shift among simulated and experiment datasets can be found. In order to be able to compare and evaluate the out_exp and out_sim, both inp_exp and inp_sim has to start from the same date. It means I need to shift data from simulation (inp_sim and out_sim) together according to input data (ie. inp_exp[i] == inp_sim[i]). A problem is that the shift between input data is not regular (see data below). Does any one have an idea how to do it? Thank you in advance. Original data: inp_exp <- c(0,0,5,1,2,3,4,0,0,0,1,4,8,1,2,0,0,0,0,1,5,8,9,9,1,0,0,0) inp_sim <- c(0,0,0,5,1,2,3,4,0,0,0,0,0,0,1,4,8,1,2,0,1,5,8,9,9,1,0,0) out_exp <- c(0,0,0,1,4,5,1,0,0,0,0,1,2,4,1,0,0,0,0,0,2,4,5,8,2,0,0,0) out_sim <- c(0,0,0,0,0,1,2,1,0,0,0,0,0,0,0,1,2,3,1,0,0,0,1,5,6,4,1,0) D <- seq(as.Date("2018/10/2"), by = "day", length.out = length(inp_exp)) df <- data.frame(D, inp_exp, inp_sim, out_exp, out_sim) df Expected result: inp_exp <- c(0,0,5,1,2,3,4,0,0,0,1,4,8,1,2,0,0,0,0,1,5,8,9,9,1,0,0,0) inp_sim <- c(0,0,5,1,2,3,4,0,0,0,1,4,8,1,2,0,0,0,0,1,5,8,9,9,1,0,0,0) out_exp <- c(0,0,0,1,4,5,1,0,0,0,0,1,2,4,1,0,0,0,0,0,2,4,5,8,2,0,0,0) out_sim <- c(0,0,0,0,1,2,1,0,0,0,0,1,2,3,1,0,0,0,0,0,0,1,5,6,4,1,0,0) D <- seq(as.Date("2018/10/2"), by = "day", length.out = length(inp_exp)) df <- data.frame(D, inp_exp, inp_sim, out_exp, out_sim) df
R move named column to the end of a data frame
I'm trying to move a column to the end of a data frame and I'm struggling output_index <- grep(output, names(df)) df <- cbind(df[,-output_index], df[,output_index]) This orders the data properly, however it converts the data to a matrix which doesn't work. How can I do this without losing the column names and keeping the data as a data frame.
Didn't need the , in front of the index: output_index <- grep(output, names(df)) df <- cbind(df[-output_index], df[output_index])
df <- data.frame(id=1:10, output=rnorm(10,1,1), input=rnorm(10,1,1)) output_index <- grep("output", names(df)) res.df <- cbind(df[,-output_index], df[,output_index])