I have two character vectors of equal length; where position one in vector.x matches position one in vector.y and so on. The elements refer to column names in a data frame (wide format). I would like to somehow loop through these vectors to produce xy scatter graphs for each pair in the vector, preferably in a faceted plot. Here is a (hopefully) reproducible example. To be clear, with this example, I would end up with 10 scatter graphs.
vector.x <- c("Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Bathycoccus", "Brockmanniella", "Brockmanniella", "Caecitellus_paraparvulus", "Caecitellus_paraparvulus")
vector.y <- c("Aliiroseovarius", "Neptuniibacter", "Pseudofulvibacter", "Thalassobius", "unclassified_Porticoccus", "Tenacibaculum", "Pseudomonas", "unclassified_GpIIa", "Marinobacter", "Thalassobius")
structure(list(Aliiroseovarius = c(0, 0, 0, 0.00487132352941176,
0.0108639420589757), Marinobacter = c(0, 0.00219023779724656,
0, 0.00137867647058824, 0.00310398344542162), Neptuniibacter = c(0.00945829750644884,
0.00959532749269921, 0.0171310629514964, 0.2796875, 0.345835488877393
), Pseudofulvibacter = c(0, 0, 0, 0.00284926470588235, 0.00362131401965856
), Pseudomonas = c(0.00466773123694878, 0.00782227784730914,
0.0282765737874097, 0.00707720588235294, 0.00400931195033627),
Tenacibaculum = c(0, 0, 0, 0.00505514705882353, 0.00362131401965856
), Thalassobius = c(0, 0.00166875260742595, 0, 0.0633272058823529,
0.147697878944646), unclassified_GpIIa = c(0, 0.000730079265748853,
0, 0.003125, 0.00103466114847387), unclassified_Porticoccus = c(0,
0, 0, 0.00119485294117647, 0.00569063631660631), Aplanochytrium = c(0,
0, 0, 0.000700770847932726, 0.0315839846865529), Bathycoccus = c(0.000388802488335925,
0, 0, 0.0227750525578136, 0.00526399744775881), Brockmanniella = c(0,
0.00383141762452107, 0, 0.000875963559915907, 0), Caecitellus_paraparvulus = c(0,
0, 0, 0.000875963559915907, 0.00797575370872547)), row.names = c("B11",
"B13", "B22", "DI5", "FF6"), class = "data.frame")
As Rui Barradas shows, it's possible to get a very nice plot from ggplot and gridExta. If you wanted to stick to base R, here's how you'd do that (assuming your data set is called df1):
# set plot sizes
par(mfcol = c(floor(sqrt(length(vector.x))), ceiling(sqrt(length(vector.x)))))
# loop through plots
for (i in 1:length(vector.x)) {
plot(df1[[vector.x[i]]], df1[[vector.y[i]]], xlab = vector.x[i], ylab = vector.y[i])
}
# reset plot size
par(mfcol = c(1,1))
This is a bit long and convoluted but it works.
library(tidyverse)
library(gridExtra)
df_list <- apply(data.frame(vector.x, vector.y), 1, function(x){
DF <- df1[which(names(df1) %in% x)]
i <- which(names(DF) %in% vector.x)
if(i == 2) DF[2:1] else DF
})
gg_list <- lapply(df_list, function(DF){
ggplot(DF, aes(x = get(names(DF)[1]), y = get(names(DF)[2]))) +
geom_point() +
xlab(label = names(DF)[1]) +
ylab(label = names(DF)[2])
})
g <- do.call(grid.arrange, gg_list)
g
Not too elegant, but should get you going:
vector.x <- c("Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Bathycoccus", "Brockmanniella", "Brockmanniella", "Caecitellus_paraparvulus", "Caecitellus_paraparvulus")
vector.y <- c("Aliiroseovarius", "Neptuniibacter", "Pseudofulvibacter", "Thalassobius", "unclassified_Porticoccus", "Tenacibaculum", "Pseudomonas", "unclassified_GpIIa", "Marinobacter", "Thalassobius")
df1 = structure(
list(Aliiroseovarius = c(0, 0, 0, 0.00487132352941176, 0.0108639420589757),
Marinobacter = c(0, 0.00219023779724656, 0, 0.00137867647058824, 0.00310398344542162),
Neptuniibacter = c(0.00945829750644884, 0.00959532749269921, 0.0171310629514964, 0.2796875, 0.345835488877393),
Pseudofulvibacter = c(0, 0, 0, 0.00284926470588235, 0.00362131401965856),
Pseudomonas = c(0.00466773123694878, 0.00782227784730914, 0.0282765737874097, 0.00707720588235294, 0.00400931195033627),
Tenacibaculum = c(0, 0, 0, 0.00505514705882353, 0.00362131401965856),
Thalassobius = c(0, 0.00166875260742595, 0, 0.0633272058823529, 0.147697878944646),
unclassified_GpIIa = c(0, 0.000730079265748853, 0, 0.003125, 0.00103466114847387),
unclassified_Porticoccus = c(0, 0, 0, 0.00119485294117647, 0.00569063631660631),
Aplanochytrium = c(0, 0, 0, 0.000700770847932726, 0.0315839846865529),
Bathycoccus = c(0.000388802488335925, 0, 0, 0.0227750525578136, 0.00526399744775881),
Brockmanniella = c(0, 0.00383141762452107, 0, 0.000875963559915907, 0),
Caecitellus_paraparvulus = c(0, 0, 0, 0.000875963559915907, 0.00797575370872547)),
row.names = c("B11", "B13", "B22", "DI5", "FF6"),
class = "data.frame"
)
df2 = NULL
for(i in 1:10) {
df.tmp = data.frame(
plot = paste0(vector.x[i], ":", vector.y[i]),
x = df1[[vector.x[i]]],
y = df1[[vector.y[i]]]
)
if(is.null(df2)) df2=df.tmp else df2 = rbind(df2, df.tmp)
}
ggplot(data=df2, aes(x, y)) +
geom_point() +
facet_grid(cols = vars(plot))
Related
I would like to display only the positive octant of a unit sphere. So far, using the rgl package in R, I could show the entire sphere. Is it possible to "truncate" it? I am open to any other package that does the trick.
# Fake data
norm_vec <- function(x) sqrt(sum(x ^ 2))
data <- data.frame(T3 = runif(100), T6 = runif(100), P4 = runif(100))
norms <- apply(data, 1, norm_vec)
data <- data / norms
cluster <- sample(1:6, 100, replace = T)
#' Initialize a rgl device
#'
#' #param new.device a logical value. If TRUE, creates a new device
#' #param bg the background color of the device
#' #param width the width of the device
rgl_init <- function(new.device = FALSE, bg = "white", width = 640) {
if( new.device | rgl.cur() == 0 ) {
rgl.open()
par3d(windowRect = 50 + c( 0, 0, width, width ) )
rgl.bg(color = bg )
}
rgl.clear(type = c("shapes", "bboxdeco"))
rgl.viewpoint(theta = 30, phi = 0, zoom = 0.90)
}
#' Get colors for the different levels of a factor variable
#'
#' #param groups a factor variable containing the groups of observations
#' #param colors a vector containing the names of the default colors to be used
get_colors <- function(groups, group.col = palette()){
groups <- as.factor(groups)
ngrps <- length(levels(groups))
if(ngrps > length(group.col))
group.col <- rep(group.col, ngrps)
color <- group.col[as.numeric(groups)]
names(color) <- as.vector(groups)
return(color)
}
# Setting colors according to the cluster column
my_cols <- get_colors(cluster, c("#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"))
# Ploting sphere
rgl_init()
par3d(cex = 1.35)
plot3d(x = data[, "T3"], y = data[, "P4"], z = data[, "T6"],
type = "s", r = .04,
col = my_cols,
xlab = 'T3', ylab = 'P4', zlab = 'T6')
rgl.spheres(0, 0, 0, radius = 0.995, col = 'lightgray', alpha = 0.6, back = 'lines')
arc3d(c(1, 0, 0), c(0, 1, 0), c(0, 0, 0), radius = 1, lwd = 7.5, col = "black")
arc3d(c(1, 0, 0), c(0, 0, 1), c(0, 0, 0), radius = 1, lwd = 7.5, col = "black")
arc3d(c(0, 0, 1), c(0, 1, 0), c(0, 0, 0), radius = 1, lwd = 7.5, col = "black")
bbox3d(col = c("black", "black"),
xat = c(0, 0.5, 1), yat = c(0, 0.5, 1), zat = c(0, 0.5, 1),
polygon_offset = 1)
aspect3d(1, 1, 1)
You can use cliplanes3d() to do that. You should also avoid using any of the rgl.* functions; use the *3d alternatives instead unless you really know what you're doing. It's almost never a good idea to mix the two types.
For example:
# Fake data
norm_vec <- function(x) sqrt(sum(x ^ 2))
data <- data.frame(T3 = runif(100), T6 = runif(100), P4 = runif(100))
norms <- apply(data, 1, norm_vec)
data <- data / norms
cluster <- sample(1:6, 100, replace = T)
#' Initialize a rgl device
#'
#' #param new.device a logical value. If TRUE, creates a new device
#' #param bg the background color of the device
#' #param width the width of the device
rgl_init <- function(new.device = FALSE, bg = "white", width = 640) {
if( new.device || rgl.cur() == 0 ) {
open3d(windowRect = 50 + c( 0, 0, width, width ) )
bg3d(color = bg )
}
clear3d(type = c("shapes", "bboxdeco"))
view3d(theta = 30, phi = 0, zoom = 0.90)
}
#' Get colors for the different levels of a factor variable
#'
#' #param groups a factor variable containing the groups of observations
#' #param colors a vector containing the names of the default colors to be used
get_colors <- function(groups, group.col = palette()){
groups <- as.factor(groups)
ngrps <- length(levels(groups))
if(ngrps > length(group.col))
group.col <- rep(group.col, ngrps)
color <- group.col[as.numeric(groups)]
names(color) <- as.vector(groups)
return(color)
}
# Setting colors according to the cluster column
my_cols <- get_colors(cluster, c("#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"))
# Ploting sphere
rgl_init()
par3d(cex = 1.35)
plot3d(x = data[, "T3"], y = data[, "P4"], z = data[, "T6"],
type = "s", r = .04,
col = my_cols,
xlab = 'T3', ylab = 'P4', zlab = 'T6')
spheres3d(0, 0, 0, radius = 0.995, col = 'lightgray', alpha = 0.6, back = 'lines')
arc3d(c(1, 0, 0), c(0, 1, 0), c(0, 0, 0), radius = 1, lwd = 7.5, col = "black")
arc3d(c(1, 0, 0), c(0, 0, 1), c(0, 0, 0), radius = 1, lwd = 7.5, col = "black")
arc3d(c(0, 0, 1), c(0, 1, 0), c(0, 0, 0), radius = 1, lwd = 7.5, col = "black")
bbox3d(col = c("black", "black"),
xat = c(0, 0.5, 1), yat = c(0, 0.5, 1), zat = c(0, 0.5, 1),
polygon_offset = 1)
aspect3d(1, 1, 1)
clipplanes3d(c(1,0,0), c(0,1,0), c(0,0,1), d=0)
This produces
I want to plot a correlation plot between two datasets. y axis is frequency of genes and x axis is correlation value or R2 value.
gvds_counts = read.delim("GVDS_normalized_counts_new.txt", header = TRUE, sep="\t",row.names = 1)
gvds_elastic = (read.delim("GVDS_PrediXcan_Test_2021_new.txt", header = TRUE, sep="\t",row.names = 1))
gvds_elastic= gvds_elastic[,-1]
gvds_elastic1=t(gvds_elastic)
This is how my two datasets look like:
dput(gvds_elastic1[1:5,1:5])
structure(c(0.117057915232398, 0, 0.16739607781207, -0.00582814885591799,
-0.00522232324665859, 0.196331738115648, 0, -0.00712521269403638,
0, -0.00522232324665859, 0.196331738115648, -0.021754984214482,
-0.00356260634701819, -0.0210845683319449, -0.00522232324665859,
0.177439691941073, -0.021754984214482, 0, -0.0210845683319449,
-0.00522232324665859, 0.158547645766499, -0.0435099684289639,
0, -0.00582814885591799, -0.00522232324665859), .Dim = c(5L,
5L), .Dimnames = list(c("ENSG00000107959.15", "ENSG00000057608.16",
"ENSG00000281186.1", "ENSG00000151632.17", "ENSG00000134452.19"
), c("GTEX-1117F", "GTEX-111CU", "GTEX-111FC", "GTEX-111VG",
"GTEX-111YS")))
dput(gvds_counts[1:5,1:5])
structure(list(GTEX.1117F.3226.SM.5N9CT = c(1.07050611836238,
319.010823271988, 0, 0, 0), GTEX.111FC.3126.SM.5GZZ2 = c(0, 137.627503211918,
0.81921132864237, 1.63842265728474, 0), GTEX.1128S.2726.SM.5H12C = c(0.93125973749023,
98.7135321739644, 0, 0.93125973749023, 0.93125973749023), GTEX.117XS.3026.SM.5N9CA = c(0,
140.966661860149, 0, 0.766123162283421, 0.766123162283421), GTEX.1192X.3126.SM.5N9BY = c(0.937426181007344,
139.676500970094, 0, 0.937426181007344, 0)), row.names = c("ENSG00000223972.5",
"ENSG00000227232.5", "ENSG00000278267.1", "ENSG00000243485.5",
"ENSG00000237613.2"), class = "data.frame")
I want to first rename the ID in gvds_counts similar to gvds_elastic1 which is to edit it from
GTEX.1128S.2726.SM.5H12C to GTEX-1128S
and then plot a correlation between these two datasets with x axis as R2 value and y axis as frequency of genes.
I would like to plot an environmental variable on a ggplot2 version of a DCA plot.
I have some code where I extract species and data scores from vegan and then plot them up in ggplot2. I am having trouble trying to work out how I can get my environmental variable SWLI to plot as an arrow - something like this RDA's plots with ggvegan: How can I change text position for arrows text? (or see PCA example here https://www.rpubs.com/an-bui/vegan-cheat-sheet)
Can anybody help?
#DCA Plot
library(plyr)
library(vegan)
library(ggplot2)
library(cluster)
library(ggfortify)
library(factoextra)
#read in csv and remove variables you don't want to go through analysis
regforamcountsall<-read_csv("regionalforamcountsallnocalcs.csv")
swli<-read_csv("DCAenv.csv")
rownames(regforamcountsall)<-regforamcountsall$Sample
regforamcountsall$Sample = NULL
regforamcountsall$Site=NULL
regforamcountsall$SWLI=NULL
#check csv
regforamcountsall
#run ordination
ord<-decorana(regforamcountsall)
#get species scores
summary(ord)
#get DCA values of environmental variable
ord.fit <- envfit(ord ~ SWLI, data=swli, perm=999)
ord.fit
plot(ord, dis="site")
plot(ord.fit)
#use this summary code to get species scores for DCA1 and DCA2
#put species scores values in from ord plot summary stats
species.scores<-read.csv("speciescores.csv")
species.scores$species <- row.names(species.scores)
#Using the scores function from vegan to extract the sample scores and convert to a data.frame
data.scores <- as.data.frame(scores(ord))
# create a column of groupings/clusters, from the rownames of data.scores
data.scores$endgroup <- as.factor(pam(regforamcountsall, 3)$clustering)
#getting the convex hull of each unique point set
find_hull <- function(df) df[chull(data.scores$DCA1, data.scores$DCA2), ]
hulls <- NULL
for(i in 1:length(unique(data.scores$endgroup))){
endgroup_coords <- data.scores[data.scores$endgroup == i,]
hull_coords <- data.frame(
endgroup_coords[chull(endgroup_coords[endgroup_coords$endgroup == i,]$DCA1,
endgroup_coords[endgroup_coords$endgroup == i,]$DCA2),])
hulls <- rbind(hulls,hull_coords)
}
data.scores$numbers <- 1:length(data.scores$endgroup)
regforamcountsall<-read_csv("regionalforamcountsallnocalcs.csv")
rownames(regforamcountsall)<-regforamcountsall$Sample
data.scores$Site<-regforamcountsall$Site
data.scores$SWLI<-regforamcountsall$SWLI
data.scores
#DCA with species
data.scores$Site <- as.character(data.scores$Site)
library(scico)
dca <- ggplot() +
# add the point markers
geom_point(data=data.scores,aes(x=DCA1,y=DCA2,colour=SWLI,pch=Site),size=4) + geom_point(data=species.scores,aes(x=DCA1,y=DCA2),size=3,pch=3,alpha=0.8,colour="grey22") +
# add the hulls and labels - numbers position labels
geom_polygon(data = hulls,aes(x=DCA1,y=DCA2,fill=endgroup), alpha = 0.25) +
#geom_text(data=data.scores,aes(x=DCA1-0.03,y=DCA2,colour=endgroup, label = numbers))+
geom_text(data=species.scores,aes(x=DCA1+0.1,y=DCA2+0.1, label = species))+
#look this up
geom_segment(data=ord.fit,aes(x = 0, y = 0, xend=DCA1,yend=DCA2), arrow = arrow(length = unit(0.3, "cm")))+
theme_classic()+
scale_color_scico(palette = "lapaz")+
coord_fixed()
dca
#regforamcountsall data
structure(list(Sample = c("T3LB7.008", "T3LB7.18", "T3LB7.303",
"WAP 0 ST-2", "T3LB7.5", "LG120"), T.salsa = c(86.63793102, 68.5897436,
70.39274924, 5.199999999, 79.15057916, 44.40000001), H.wilberti = c(0,
0, 0, 0, 0.386100386, 9.399999998), Textularia = c(0, 0, 0, 0,
0, 0.4), T.irregularis = c(2.155172414, 10.25641026, 7.854984897,
0, 2.702702703, 0), P.ipohalina = c(0, 0, 0, 0, 0, 0), J.macrescens = c(4.741379311,
5.769230769, 4.833836859, 5.800000001, 8.108108107, 5.400000001
), T.inflata = c(6.465517244, 15.38461538, 16.918429, 83.2, 5.791505794,
40.4), S.lobata = c(0, 0, 0, 2.300000001, 0, 0), M.fusca = c(0,
0, 0, 3.499999999, 3.861003862, 0), A.agglutinans = c(0, 0, 0,
0, 0, 0), A.exiguus = c(0, 0, 0, 0, 0, 0), A.subcatenulatus = c(0,
0, 0, 0, 0, 0), P.hyperhalina = c(0, 0, 0, 0, 0, 0), SWLI = c(200,
197.799175, 194.497937, 192.034776, 191.746905, 190.397351),
Site = c("LSP", "LSP", "LSP", "WAP", "LSP", "LG")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
#data.scores
structure(list(DCA1 = c(-1.88587476921648, -1.58550534382589,
-1.59816311314591, -0.0851161831632892, -1.69080448670088, -1.14488987340879
), DCA2 = c(0.320139736602921, 0.226662031865046, 0.230912045301637,
-0.0531232712001122, 0.272143119753744, 0.0696939776869396),
DCA3 = c(-0.755595015095353, -0.721144380683279, -0.675071834919103,
0.402339366526422, -0.731006052784081, 0.00474996849420783
), DCA4 = c(-1.10780013276303, -0.924265835490466, -0.957711953532202,
-0.434438970032073, -0.957873836258657, -0.508347000558056
), endgroup = structure(c(1L, 1L, 1L, 2L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), numbers = 1:6, Site = c("LSP",
"LSP", "LSP", "WAP", "LSP", "LG"), SWLI = c(200, 197.799175,
194.497937, 192.034776, 191.746905, 190.397351)), row.names = c(NA,
6L), class = "data.frame")
#species.scores
structure(list(species = c("1", "2", "3", "4", "5", "6"), DCA1 = c(-2.13,
-1.6996, -2.0172, -0.9689, 1.0372, -0.3224), DCA2 = c(0.342,
-0.8114, 0.3467, -0.3454, 2.0007, 0.9147)), row.names = c(NA,
6L), class = "data.frame")
Here's some data
structure(list(Period = structure(c(2017.83333333333, 2017.91666666667,
2018, 2018.08333333333, 2018.16666666667, 2018.25, 2018.33333333333,
2018.41666666667, 2018.5, 2018.58333333333, 2018.66666666667,
2018.75, 2018.83333333333, 2018.91666666667, 2019, 2019.08333333333,
2019.16666666667, 2019.25, 2019.33333333333, 2019.41666666667,
2019.5), class = "yearmon"), neg = c(0, 0, 0, 0, 0, 0, 0, 0,
-0.782066446199374, -1.33087717414387, -1.55401649141939, -1.9056578851487,
-2.19869230289699, -1.99579537718088, -2.03857957860623, -2.14184701726747,
-2.27461866979037, -2.39022691659445, -2.3732334198156, -1.83686080707261,
-1.86553025598681), pos = c(0.550567625206492, 0.699954781241267,
0.775518140437689, 0.647367030217637, 0.84562688020279, 0.923814518387379,
0.686796306801202, 0.131849327496122, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0)), row.names = 960:980, class = "data.frame")
I want to plot the SPEI values with ggplot as I learned it here:
How to format the x-axis of the hard coded plotting function of SPEI package in R?
library(ggplot2)
ggplot(test) +
geom_area(aes(x = Period, y = pos), fill = "blue", col = "black") +
geom_area(aes(x = Period, y = neg), fill = "red", col = "black") +
scale_y_continuous(limits = c(-2.25, 2.25),
breaks = -2:2) +
ylab("SPEI") + xlab("") +
theme_bw()
The result looks like this:
As you can see, when the sign changes from positive to negative, geom_area doesn't end/start at the same position. Anyone any idea how to fix this? I thought about using Date instead of yearmon, but got stuck with the same problem.
This is a gates and posts problem: Each geom_area at the inflection is starting and ending on a post, hence the overlap. They should be starting in the middle of the gate between the posts.
This solution may be a bit heavy handed but I think it should apply where there are multiple changes from positive to negative and vice versa.
library(ggplot2)
library(tidyr)
library(tibble)
library(dplyr)
library(lubridate)
library(imputeTS)
Determine when the data changes from positive to negative or vice versa
inflections <-
test %>%
mutate(inflect = case_when(lag(neg) == 0 & pos == 0 ~ TRUE,
lag(pos) == 0 & neg == 0 ~ TRUE,
TRUE ~ FALSE),
rowid = row_number() - 0.5) %>%
filter(inflect) %>%
select(-inflect) %>%
mutate(Period = NA_Date_,
pos = 0,
neg = 0)
Insert a new row to mark the inflection point to allow inclusion of an intermediary time where both pos and neg can be zero.
test1 <-
test %>%
rowid_to_column() %>%
bind_rows(inflections) %>%
arrange(rowid)
Impute a time when the data changes from pos to neg with a function from imputeTS.
test1$Period <- na_interpolation(as.ts(test1$Period))
plot
ggplot(test1) +
geom_area(aes(x = Period, y = pos), fill = "blue", col = "black") +
geom_area(aes(x = Period, y = neg), fill = "red", col = "black") +
scale_y_continuous(limits = c(-2.25, 2.25),
breaks = -2:2) +
ylab("SPEI") + xlab("") +
theme_bw()
data
```
test <- structure(list(Period = structure(c(2017.83333333333, 2017.91666666667,
2018, 2018.08333333333, 2018.16666666667, 2018.25, 2018.33333333333,
2018.41666666667, 2018.5, 2018.58333333333, 2018.66666666667,
2018.75, 2018.83333333333, 2018.91666666667, 2019, 2019.08333333333,
2019.16666666667, 2019.25, 2019.33333333333, 2019.41666666667,
2019.5), class = "yearmon"), neg = c(0, 0, 0, 0, 0, 0, 0, 0,
-0.782066446199374, -1.33087717414387, -1.55401649141939, -1.9056578851487,
-2.19869230289699, -1.99579537718088, -2.03857957860623, -2.14184701726747,
-2.27461866979037, -2.39022691659445, -2.3732334198156, -1.83686080707261,
-1.86553025598681), pos = c(0.550567625206492, 0.699954781241267,
0.775518140437689, 0.647367030217637, 0.84562688020279, 0.923814518387379,
0.686796306801202, 0.131849327496122, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0)), row.names = 960:980, class = "data.frame")
```
<sup>Created on 2020-05-22 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup>
I've tried working on this loop and come out with the below errors. I'm not sure if I can provide data, if needed I'll do my best to obfuscate the data. Here is the loop I am trying to use, any tips on what I'm doing wrong would be greatly appreciated as I haven't found a viable solution yet. The exact error is below the code.
decay_function = function(df)
{
df <- df[order(df$department,df$product,df$region,df$monthnum),]
for(mk in 1:ncol(levels_department)) {
newdata <- df[which(df$department==as.character(levels_department[,mk])), ]
levels_product<-as.data.frame(t(levels(as.factor(newdata$product))))
for(md in 1:ncol(levels_product)){
newdata <- newdata[which(newdata$product==as.character(levels_product[,md])), ]
levels_region<-as.data.frame(t(levels(as.factor(newdata$region))))
for(dm in 1:ncol(levels_region)){
newdata <- newdata[which(newdata$region==as.character(levels_region[,dm])), ]
for(i in 1:(nrow(newdata)-1)){
start_month = newdata$monthnum[i]
end_month = newdata$monthnum[nrow(newdata)]
row_vector = c()
decay_vector = c()
for(j in 5:ncol(newdata)){
k = 0
for(l in start_month:end_month){
distance_initial = (l - start_month)
vector_increment = (l - (start_month-1))
decay_rate = (0.5)^((1/halflife)*distance_initial)
decay_value = (decay_rate)*(newdata[[i,j]])
k = k + decay_value
}
df2[i,j] = k
}
print(df2)
}
if (mk=='1' & md=='1' & dm=='1'){
outdata<-df2
} else {
outdata<-rbind(outdata,df2)
}
}
}
}
}
output_data = decay_function(tempone)
Error in start_month:end_month : argument of length 0
> dput(head(df))
structure(list(monthnum = c(33, 33, 33, 33, 33, 33), Region = c(2251,
2251, 2251, 2251, 2251, 2251), Department = c("Softlines", "Softlines",
"Softlines", "Softlines", "Softlines", "Softlines"), Product = c("T-Shirt",
"Jacket", "Sweat Shirt", "Tank Top", "Sweat Pants", "Mens Jeans"
), Incentive_Amount = c(5742.43, 108006.61, 459076.67, 34006,
141632.42, 29580.38), Leads_T1 = c(0, 0, 0, 0, 0, 0), DCLeads = c(0,
1, 0, 0, 0, 0), PhoneLeads = c(0, 0, 0, 0, 0, 0), T3_CRM_Leads = c(0,
0, 0, 0, 0, 0), Leads_Third = c(0, 1, 0, 0, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L))