Related
I'm trying to use months as the x-axis for a ggplot graph, but I have multiple times in each day, and I need the times to be in chronological order. I've tried manually setting the breaks and labels to month numbers and names, but now the x-axis is just blank. What should I do?
month_nums <- seq(1, 12)
month_names <- month.abb
time_strs <- paste(moods$Month, moods$Day, moods$Year, moods$Hour, moods$Minute, sep=":")
timestamps <- as.POSIXct(time_strs, format = "%m:%d:%Y:%H:%M", tz="EST")
ggplot(data2, aes(x=timestamps)) +
geom_smooth(aes(y=anxious2, color="Anxiety"), size=1.2) +
geom_smooth(aes(y=happy2, color="Happiness"), size=1.2) +
geom_smooth(aes(y=social2, color="Sociability"), size=1.2) +
scale_color_manual(name="Variables",
values=c("red",
"grey20",
"grey40")) +
scale_x_continuous(name="Timestamps", breaks=month_nums, labels=month_names) +
scale_y_continuous(name="Intensity") +
ggtitle("Emotions")
# moods data
structure(list(Year = c("2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021"),
Month = c("9", "9", "9", "9", "9", "10", "10", "10", "10", "10"),
Day = c("29", "29", "30", "30", "30", "1", "1", "1", "1", "2"),
Hour = c("16", "21", "7", "12", "16", "8", "12", "19", "22", "13"),
Minute = c("24", "52", "58", "53", "18", "42", "24", "49", "18", "27"),
happy = c("3", "4", "5", "3", "4", "5", "2", "3", "1", "1"),
social = c("6", "5", "8", "8", "4", "10", "3", "2", "2", "2"),
anxiety = c("-", "-", "1", "2", "1", "1", "2", "1", "1", "1")),
row.names = 2:11, class = "data.frame")
As timestamps is a datetime/POSIXct, you'll find scale_x_datetime much handier than scale_x_continuous. You can tweek the arguments to show what you prefer.
scale_x_date(date_breaks = "1 month", date_labels = "%b")
Output:
Data:
I needed to change your variable names of the provided data a little bit, add some (smaller) missing parts, as well as extending the range of the data to more points than 2 in order to have some output to show here.
moods <- structure(list(Year = c("2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021"),
Month = c("9", "7", "8", "9", "9", "10", "10", "11", "12", "12"),
Day = c("29", "29", "30", "30", "30", "1", "1", "1", "1", "2"),
Hour = c("16", "21", "7", "12", "16", "8", "12", "19", "22", "13"),
Minute = c("24", "52", "58", "53", "18", "42", "24", "49", "18", "27"),
happy = c("3", "4", "5", "3", "4", "5", "2", "3", "1", "1"),
social = c("6", "5", "8", "8", "4", "10", "3", "2", "2", "2"),
anxiety = c(NA, NA, "1", "2", "1", "1", "2", "1", "1", "1")),
row.names = 2:11, class = "data.frame") |> mutate(across(c(happy:anxiety), as.numeric))
data2 <- moods
data2$time_strs <- paste(moods$Month, moods$Day, moods$Year, moods$Hour, moods$Minute, sep=":")
data2$timestamps <- as.POSIXct(data2$time_strs, format = "%m:%d:%Y:%H:%M", tz="EST")
ggplot(data2, aes(x=timestamps)) +
geom_smooth(aes(y=anxiety, color="Anxiety"), size=1.2) +
geom_smooth(aes(y=happy, color="Happiness"), size=1.2) +
geom_smooth(aes(y=social, color="Sociability"), size=1.2) +
scale_color_manual(name="Variables",
values=c("red",
"grey20",
"grey40")) +
scale_x_datetime(date_breaks = "1 month", date_labels = "%b") +
scale_y_continuous(name="Intensity") +
ggtitle("Emotions")
I am trying to run a regression using lars in R. However, I keep getting this error :
Error in rep(1, n) : invalid 'times' argument.
Here's my code :
IN: dput(head(LBJ09))
OUT: structure(list(G = c("1", "2", "3", "4", "5", "6"), Date = c("2008-10-28",
"2008-10-30", "2008-11-01", "2008-11-03", "2008-11-05", "2008-11-07"
), Age = c("23-303", "23-305", "23-307", "23-309", "23-311",
"23-313"), Tm = c("CLE", "CLE", "CLE", "CLE", "CLE", "CLE"),
Home = c("#", "", "#", "#", "", ""), Opp = c("BOS", "CHA",
"NOH", "DAL", "CHI", "IND"), GS = c("1", "1", "1", "1", "1",
"1"), MP = c("36:00", "30:16", "37:43", "34:08", "35:50",
"39:20"), FG = c("9", "7", "6", "8", "13", "11"), FGA = c("21",
"15", "15", "20", "23", "24"), FGP = c(".429", ".467", ".400",
".400", ".565", ".458"), `3PM` = c("0", "0", "0", "0", "0",
"1"), `3PA` = c("4", "2", "3", "2", "2", "6"), `3PP` = c(".000",
".000", ".000", ".000", ".000", ".167"), FT = c("4", "8",
"3", "13", "15", "4"), FTA = c("8", "12", "4", "15", "16",
"7"), FTP = c(".500", ".667", ".750", ".867", ".938", ".571"
), ORB = c("1", "2", "2", "2", "2", "2"), DRB = c("6", "7",
"5", "6", "7", "7"), TRB = c("7", "9", "7", "8", "9", "9"
), AST = c("6", "9", "13", "3", "6", "8"), STL = c("2", "0",
"3", "2", "4", "1"), BLK = c("1", "1", "0", "1", "0", "4"
), TOV = c("3", "5", "5", "1", "5", "4"), PF = c("4", "3",
"3", "1", "1", "2"), PTS = c("22", "22", "15", "29", "41",
"27"), GmSc = c("14.1", "17.0", "15.3", "24.0", "36.0", "21.5"
)), row.names = c(NA, 6L), class = "data.frame")
library(lars)
lars09 <- lars(LBJ09$PTS, LBJ09$FG+LBJ09$AST+LBJ09$`3PM`+LBJ09$FT+LBJ09$TRB+
LBJ09$STL+LBJ09$BLK, type = "lasso")
plot(lars09)
I expect the lars package to run successfully since all LBJ variables are numerical data in columns. However, lars does not even run. any ideas ?
Try this code:
# Convert variables from character to numeric
vars <- c("PTS", "FG", "AST", "3PM", "FT", "TRB", "STL", "BLK")
LBJ09[, vars] <- sapply(LBJ09[, vars], as.numeric)
# Create the X matrix of explanatory variables and the y vector with the outcome
X <- as.matrix(LBJ09[, vars[-1]])
y <- LBJ09[, vars[1]]
lars09 <- lars(x=X, y=y, type = "lasso")
plot(lars09)
After importing a csv file, R separated my data into columns every comma it reads.
My issue is that i had originally two columns where i had different values that are floating numbers, and the other column is the sum of all of these floating number. So R spread these elements in 5 or 6 columns sometimes less columns, sometimes more, depending on the number of commas existing.
There's a facilitation in this issue: the first column is delimited from parenthesis: so for example the first row first column is (-5,5+9)+(-10+12) and the second column would be the sum of this floating numbers. So i can easily see where the first column stops, after the second column (that is the sum of the elements of the first column) there are at least 2 or more empty columns so that i can easily recognize where the second column ends. Now what i have to do is to rearrange my dataset in the original form. I post the structure of the dataset for an easy understanding
here's is the code of the first rows
Y= structure(list(V24 = structure(c(66L, 15L, 44L, 28L, 68L, 10L
), .Label = c("", "(-0", "(-0+7", "(-1", "(-1+11", "(-1+11)+(-13",
"(-1+11)+(-18+18", "(-1+3)+(-10+14)", "(-1+8)", "(-2", "(-2+10",
"(-2+10)", "(-2+10)+(-13", "(-2+11", "(-2+11)", "(-2+11)+(-13",
"(-2+11)+(-14+17)", "(-2+12", "(-2+12)", "(-2+12)+(-14", "(-2+12)+(-14+15",
"(-2+12)+(-14+16)", "(-2+6)+(-8+10)+(-14", "(-2+7", "(-2+7)",
"(-2+7)+(-11", "(-2+7)+(-13", "(-2+8", "(-2+8)+(-10", "(-2+8)+(-11",
"(-2+8)+(-13", "(-2+8)+(-15", "(-2+9", "(-2+9)", "(-2+9)+(-13",
"(-2+9)+(-14", "(-3", "(-3+10", "(-3+10)", "(-3+10)+(-13", "(-3+10)+(-13+14",
"(-3+10)+(-14+14", "(-3+11", "(-3+11)", "(-3+11)+(-13", "(-3+12",
"(-3+12)", "(-3+12)+(-13", "(-3+13)", "(-3+7", "(-3+8", "(-3+8)",
"(-3+8)+(-11+12", "(-3+9", "(-3+9)", "(-4", "(-4+10", "(-4+10)",
"(-4+10)+(-11+12)", "(-4+11", "(-4+11)", "(-4+12", "(-4+12)",
"(-4+13)", "(-4+14)", "(-4+6)+(-9", "(-4+8", "(-4+8)+(-10+14)",
"(-4+9", "(-4+9)+(-10+11)+(-13", "(-4+9)+(-12+13)+(-18+18", "(-4+9)+(-13+14",
"(-4+9)+(-14+15)", "(-4+9)+(-9", "(-5", "(-5+10", "(-5+10)",
"(-5+10)+(-13", "(-5+11)", "(-5+12)", "(-5+13)+(-14", "(-6",
"(1+6)+(-8+9", "S"), class = "factor"), V25 = structure(c(7L,
67L, 66L, 58L, 66L, 54L), .Label = c("", "(-4+11", "(-5", "10",
"12", "25)+(-14+15)", "25+12", "25+14", "3)", "3+6)", "3+7",
"5", "5)", "5)+(-10", "5)+(-11", "5)+(-11+13)+(-14", "5)+(-13",
"5)+(-13+13", "5)+(-13+14", "5)+(-14", "5)+(-16", "5)+(-16+16",
"5)+(-16+17)+(-21+22", "5)+(-17", "5)+(-18+18", "5+10", "5+10)",
"5+10)+(-13", "5+11", "5+11)", "5+11)+(-13", "5+11)+(-17+17",
"5+11)+(-21+21", "5+12", "5+12)", "5+12)+(-13", "5+12)+(-20+20",
"5+13", "5+13-13", "5+14", "5+15", "5+16", "5+16)", "5+18)",
"5+6)+(-14+14", "5+7", "5+7)+(-13", "5+7)+(-15", "5+7)+(-9+12",
"5+8", "5+8)", "5+8)+(-17", "5+9", "5+9)", "5+9)+(-13", "5+9)+(-14",
"5+9)+(-22", "50)", "50+10)+(-14", "50+14", "50+7", "6", "7",
"75)", "75)+(-14+15", "8", "9", "T"), class = "factor"), V26 = structure(c(31L,
1L, 1L, 29L, 1L, 29L), .Label = c("", "10", "11", "25)", "25)+(-14+15",
"25+15", "4", "5", "5)", "5)+(-13", "5)+(-14", "5)+(-16", "5)+(-16+17)",
"5)+(-20+21)", "5+10)+(-13", "5+13", "5+14", "5+14)", "5+14)+(-18+18",
"5+15", "5+15)", "5+16)", "5+17", "5+18", "5+18)", "5+23", "50)",
"50+16", "6", "7", "75)", "75+14", "75+15", "8", "9"), class = "factor"),
V27 = structure(c(9L, 1L, 1L, 9L, 1L, 9L), .Label = c("",
"10", "11", "12", "25", "25)", "25+17", "3", "5", "5)", "5+14",
"5+15)", "5+15)+(-18", "50)", "6", "7", "75", "75)", "8",
"9"), class = "factor"), V28 = structure(c(9L, 12L, 15L,
1L, 8L, 1L), .Label = c("", "1", "10", "11", "2", "25)",
"3", "4", "5", "5)", "5+19", "6", "7", "75", "8", "9"), class = "factor"),
V29 = structure(c(1L, 5L, 10L, 1L, 6L, 1L), .Label = c("",
"25", "2prol", "30", "40", "41", "5", "5)", "50", "52", "75",
"8", "9"), class = "factor"), V30 = structure(c(1L, 6L, 12L,
5L, 7L, 13L), .Label = c("", "25", "3", "3conc", "4", "45",
"46", "5", "52", "56", "6", "60", "8", "9"), class = "factor"),
V31 = structure(c(15L, 7L, 10L, 3L, 8L, 7L), .Label = c("",
"35", "40", "43", "4mot", "5", "52", "53", "54", "55", "56",
"57", "60", "63", "7"), class = "factor"), V32 = c(43L, NA,
NA, 52L, NA, 57L), V33 = c(45L, NA, NA, 59L, NA, 56L), V34 = c(55L,
NA, NA, NA, NA, NA)), row.names = 3:8, class = "data.frame")
So my idea is:
read all the columns, identify and separate the first from the second column: the last element of the first column is highlighted by the closing parenthesis
Working on the first one: i would say "take the value of the next column and add to the previous column adding a comma before"
Working on the second one: Since the second column starts where there's the first element after the closing parenthesis, i would take the first value (in that case we have an integer) or if the following column has a number (so that the column is not empty) add the number of the following column to the first columns linked by a comma.
Note: i have the idea how to do it, but i can't translate these ideas into a code, how can i do this?
I'm trying to plot against dates in R. I've run into trouble trying to create vertical lines against a plot that I already have. All of the different formats that I try either result in nothing showing up on the plot, or a line at 1970 (the default date). The year-data is in the form yyyy-mm-dd. For example, "1914-07-01".
I've also tried inputting these dates in a data.frame, but got the same problem.
I've been trying to make a reproducible example, but I haven't seen any example datasets to do so with, and got frustrated trying to create one... sorry about that. Here's the relevant code:
ggplot(M,aes(x=date,color=origin,y=value)) +
geom_point() +
geom_line() +
facet_grid(topic~origin) +
geom_vline(xintercept=as.numeric(as.Date("1914-07-01")))
Everything plots correctly without the addition of the final line.
Edit: here's the result of dput(head(M)):
structure(list(topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25"), class = "factor"), date = structure(c(-1767196800, -1765987200,
-1764518400, -1763308800, -1762099200, -1760889600), class = c("POSIXct",
"POSIXt"), tzone = ""), origin = structure(c(2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Blast", "The_Egoist"), class = "factor"),
value = c(6.69960398194253e-07, 7.48757156068349e-07, 7.04834977806836e-07,
7.10226526475778e-07, 6.8295233938925e-07, 6.16466066169137e-07
)), .Names = c("topic", "date", "origin", "value"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), vars = list(
topic, date), drop = TRUE, indices = list(0L, 1L, 2L, 3L,
4L, 5L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
"23", "24", "25"), class = "factor"), date = structure(c(-1767196800,
-1765987200, -1764518400, -1763308800, -1762099200, -1760889600
), class = c("POSIXct", "POSIXt"), tzone = "")), class = "data.frame", row.names = c(NA,
-6L), .Names = c("topic", "date"), vars = list(topic, date)))
You were very close, the problem is your data is in POSIXct, and you were trying to convert to Date. To fix it, change to POSIXct:
ggplot(M,aes(x=date,color=origin,y=value)) +
geom_point() +
geom_line() +
facet_grid(topic~origin) +
geom_vline(xintercept=as.numeric(as.POSIXct("1914-07-01")))
You can see the difference in the calls:
as.numeric(as.POSIXct("1914-07-01"))
[1] -1751569200
as.numeric(as.Date("1914-07-01"))
[1] -20273
Explaining why the intecept was so close to 1970 (the 0 for both)
I have measurements of CH4 concentration with depth:
df <- structure(list(Depth = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 15L, 16L, 17L), .Label = c("0", "10",
"12", "14", "16", "18", "2", "20", "22", "24", "26", "28", "30",
"32", "4", "6", "8", "AR"), class = "factor"), Conc_CH4 = c(4.30769230769231,
23.1846153846154, 14.5615384615385, 21.1769230769231, 16.2615384615385,
132.007692307692, 5.86923076923077, 389.353846153846, 823.023076923077,
948.684615384615, 1436.56923076923, 1939.88461538462, 26.2769230769231,
27.5538461538462, 19.6461538461538)), .Names = c("Depth", "Conc_CH4"
), row.names = c(NA, -15L), class = "data.frame")
And I need to create a plot like this:
But I have some problems: the factors in my data are in the wrong order, and I don't know how to plot this kind of data using ggplot2.
Any ideas?
Here's a solution with base plotting functions (you reverse the limits of ylim):
df$Depth <- as.numeric(as.character(df$Depth))
df <- df[order(df$Depth),]
plot(Depth~Conc_CH4, df, t="l", ylim=rev(range(df$Depth)))
Why not convert Depth to a number and plot?
ggplot(transform(df, Depth=as.numeric(as.character(df$Depth))),
aes(x=Conc_CH4, y=Depth)) +
geom_line() + scale_y_reverse()
The as.numeric(as.character(...)) is because your Depth is a factor and calling as.numeric directly converts factors differently than character to string.
The scale_y_reverse reverses the y scale.
If your actual data has a depth of "AR" in it, you'll have to omit them or otherwise handle them.