Trouble plotting dates with ggplot2 - r

I'm trying to plot against dates in R. I've run into trouble trying to create vertical lines against a plot that I already have. All of the different formats that I try either result in nothing showing up on the plot, or a line at 1970 (the default date). The year-data is in the form yyyy-mm-dd. For example, "1914-07-01".
I've also tried inputting these dates in a data.frame, but got the same problem.
I've been trying to make a reproducible example, but I haven't seen any example datasets to do so with, and got frustrated trying to create one... sorry about that. Here's the relevant code:
ggplot(M,aes(x=date,color=origin,y=value)) +
geom_point() +
geom_line() +
facet_grid(topic~origin) +
geom_vline(xintercept=as.numeric(as.Date("1914-07-01")))
Everything plots correctly without the addition of the final line.
Edit: here's the result of dput(head(M)):
structure(list(topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25"), class = "factor"), date = structure(c(-1767196800, -1765987200,
-1764518400, -1763308800, -1762099200, -1760889600), class = c("POSIXct",
"POSIXt"), tzone = ""), origin = structure(c(2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Blast", "The_Egoist"), class = "factor"),
value = c(6.69960398194253e-07, 7.48757156068349e-07, 7.04834977806836e-07,
7.10226526475778e-07, 6.8295233938925e-07, 6.16466066169137e-07
)), .Names = c("topic", "date", "origin", "value"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), vars = list(
topic, date), drop = TRUE, indices = list(0L, 1L, 2L, 3L,
4L, 5L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
"23", "24", "25"), class = "factor"), date = structure(c(-1767196800,
-1765987200, -1764518400, -1763308800, -1762099200, -1760889600
), class = c("POSIXct", "POSIXt"), tzone = "")), class = "data.frame", row.names = c(NA,
-6L), .Names = c("topic", "date"), vars = list(topic, date)))

You were very close, the problem is your data is in POSIXct, and you were trying to convert to Date. To fix it, change to POSIXct:
ggplot(M,aes(x=date,color=origin,y=value)) +
geom_point() +
geom_line() +
facet_grid(topic~origin) +
geom_vline(xintercept=as.numeric(as.POSIXct("1914-07-01")))
You can see the difference in the calls:
as.numeric(as.POSIXct("1914-07-01"))
[1] -1751569200
as.numeric(as.Date("1914-07-01"))
[1] -20273
Explaining why the intecept was so close to 1970 (the 0 for both)

Related

conditionally change the scale_shape_manual values based on the column values in ggplot2

I want to conditionally change the shapes based on the values within a column, is it possible, i tried the below code and it did not work, any thoughts. I tried using the case_when() within scale_shape_manual()
here the avalc variable is available in adrs3_all dataset which i am passing in the geom_point()
adrs4 <- structure(list(subjid = structure(c(1L, 1L, 1L, 1L, 1L, 1L), levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"), class = "factor"), ady = c(38, 84, 111, 111, 242, 242), avalc = structure(c(3L, 4L, 6L, 6L, 7L, 7L), levels = c("CR", "PR", "SD", "PD", "On-treatment", "EOT", "Death"), class = "factor"), trtedy = c(0, 0, 0, 0, 0, 242), trtpn = c(1L, 1L, 1L, 1L, 1L, 1L), trtp = c("TRT A", "TRT A", "TRT A", "TRT A", "TRT A", "TRT A")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))
adrs3_all <- adrs4 <- structure(list(subjid = structure(c(1L, 1L, 1L, 1L, 1L, 1L), levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"), class = "factor"), ady = c(38, 84, 111, 111, 242, 242), avalc = structure(c(3L, 4L, 6L, 6L, 7L, 7L), levels = c("CR", "PR", "SD", "PD", "On-treatment", "EOT", "Death"), class = "factor"), trtedy = c(0, 0, 0, 0, 0, 242), trtpn = c(1L, 1L, 1L, 1L, 1L, 1L), trtp = c("TRT A", "TRT A", "TRT A", "TRT A", "TRT A", "TRT A")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))
ggplot((adrs4 %>% filter(trtpn==1)),aes(reorder(subjid,trtedy), trtedy)) + # Base axis
geom_bar(stat="identity", aes(fill=factor(trtp))) +
geom_point(data=(adrs3_all %>% filter(trtpn==1)), # Use df.shapes to add reponse points
aes(subjid, ady, colour=avalc, shape=avalc), size=10) +
coord_flip() +
theme(text=element_text(family="sans")) +
scale_color_manual(values = c('darkcyan','brown','darkviolet','red','forestgreen','darkgoldenrod2','black')) +
scale_shape_manual(values=case_when(avalc=='Death' ~ c(rep(unicode[["cross"]], 1), # Add shapes
avalc=='EOT' ~ rep(unicode[["star"]],1),
avalc=='PD' ~ unicode[["square"]],
avalc=='SD' ~ unicode[["circle"]],
avalc=='On-treatment' ~ rep(unicode[["arrow"]],1),
avalc=='PR' ~ rep(unicode[["triangle2"]],1),
avalc=='CR' ~ unicode[["triangl1"]], 1)), guide = "none") +
scale_y_continuous(limits = c(0,600),breaks=seq(0,600,50)) +
labs(fill="Treatment", colour="Response", shape="Response", # Add labels
x="Subject ID ", y="Days",
title="Swimmers Plot",subtitle = 'All Treated Subjects', size=20) +
theme(plot.title = element_text(hjust = 1.5), # Put title in the middle of plot
plot.caption = element_text(size=15, hjust=1.5),
legend.text = element_text(size=15)) +
theme_classic(base_size = 20)
You’ve made things a bit more complicated than you need to. You can just pass a vector of shapes to values in scale_shape_manual(). You may have to change the order so the shapes you want “line up” with the values you want.
library(ggplot2)
# example `unicode` vector
unicode <- c(star = "*", square = "#", circle = "#")
ggplot(mpg) +
geom_point(aes(cty, hwy, color = drv, shape = drv), size = 6) +
scale_shape_manual(values = c(
unicode[["star"]],
unicode[["square"]],
unicode[["circle"]]
))
And if, for example, you wanted "4" to correspond to the square and "f" to the star, you would either change the order of the values argument:
# ...
scale_shape_manual(values = c(
unicode[["square"]],
unicode[["star"]],
unicode[["circle"]]
))
Or alternatively, change the levels order of drv:
library(dplyr)
mpg2 <- mpg %>%
mutate(drv = factor(drv, c(“f”, “4”, “r”))
ggplot(mpg2) +
# ...

CSV Import issues in R

After importing a csv file, R separated my data into columns every comma it reads.
My issue is that i had originally two columns where i had different values that are floating numbers, and the other column is the sum of all of these floating number. So R spread these elements in 5 or 6 columns sometimes less columns, sometimes more, depending on the number of commas existing.
There's a facilitation in this issue: the first column is delimited from parenthesis: so for example the first row first column is (-5,5+9)+(-10+12) and the second column would be the sum of this floating numbers. So i can easily see where the first column stops, after the second column (that is the sum of the elements of the first column) there are at least 2 or more empty columns so that i can easily recognize where the second column ends. Now what i have to do is to rearrange my dataset in the original form. I post the structure of the dataset for an easy understanding
here's is the code of the first rows
Y= structure(list(V24 = structure(c(66L, 15L, 44L, 28L, 68L, 10L
), .Label = c("", "(-0", "(-0+7", "(-1", "(-1+11", "(-1+11)+(-13",
"(-1+11)+(-18+18", "(-1+3)+(-10+14)", "(-1+8)", "(-2", "(-2+10",
"(-2+10)", "(-2+10)+(-13", "(-2+11", "(-2+11)", "(-2+11)+(-13",
"(-2+11)+(-14+17)", "(-2+12", "(-2+12)", "(-2+12)+(-14", "(-2+12)+(-14+15",
"(-2+12)+(-14+16)", "(-2+6)+(-8+10)+(-14", "(-2+7", "(-2+7)",
"(-2+7)+(-11", "(-2+7)+(-13", "(-2+8", "(-2+8)+(-10", "(-2+8)+(-11",
"(-2+8)+(-13", "(-2+8)+(-15", "(-2+9", "(-2+9)", "(-2+9)+(-13",
"(-2+9)+(-14", "(-3", "(-3+10", "(-3+10)", "(-3+10)+(-13", "(-3+10)+(-13+14",
"(-3+10)+(-14+14", "(-3+11", "(-3+11)", "(-3+11)+(-13", "(-3+12",
"(-3+12)", "(-3+12)+(-13", "(-3+13)", "(-3+7", "(-3+8", "(-3+8)",
"(-3+8)+(-11+12", "(-3+9", "(-3+9)", "(-4", "(-4+10", "(-4+10)",
"(-4+10)+(-11+12)", "(-4+11", "(-4+11)", "(-4+12", "(-4+12)",
"(-4+13)", "(-4+14)", "(-4+6)+(-9", "(-4+8", "(-4+8)+(-10+14)",
"(-4+9", "(-4+9)+(-10+11)+(-13", "(-4+9)+(-12+13)+(-18+18", "(-4+9)+(-13+14",
"(-4+9)+(-14+15)", "(-4+9)+(-9", "(-5", "(-5+10", "(-5+10)",
"(-5+10)+(-13", "(-5+11)", "(-5+12)", "(-5+13)+(-14", "(-6",
"(1+6)+(-8+9", "S"), class = "factor"), V25 = structure(c(7L,
67L, 66L, 58L, 66L, 54L), .Label = c("", "(-4+11", "(-5", "10",
"12", "25)+(-14+15)", "25+12", "25+14", "3)", "3+6)", "3+7",
"5", "5)", "5)+(-10", "5)+(-11", "5)+(-11+13)+(-14", "5)+(-13",
"5)+(-13+13", "5)+(-13+14", "5)+(-14", "5)+(-16", "5)+(-16+16",
"5)+(-16+17)+(-21+22", "5)+(-17", "5)+(-18+18", "5+10", "5+10)",
"5+10)+(-13", "5+11", "5+11)", "5+11)+(-13", "5+11)+(-17+17",
"5+11)+(-21+21", "5+12", "5+12)", "5+12)+(-13", "5+12)+(-20+20",
"5+13", "5+13-13", "5+14", "5+15", "5+16", "5+16)", "5+18)",
"5+6)+(-14+14", "5+7", "5+7)+(-13", "5+7)+(-15", "5+7)+(-9+12",
"5+8", "5+8)", "5+8)+(-17", "5+9", "5+9)", "5+9)+(-13", "5+9)+(-14",
"5+9)+(-22", "50)", "50+10)+(-14", "50+14", "50+7", "6", "7",
"75)", "75)+(-14+15", "8", "9", "T"), class = "factor"), V26 = structure(c(31L,
1L, 1L, 29L, 1L, 29L), .Label = c("", "10", "11", "25)", "25)+(-14+15",
"25+15", "4", "5", "5)", "5)+(-13", "5)+(-14", "5)+(-16", "5)+(-16+17)",
"5)+(-20+21)", "5+10)+(-13", "5+13", "5+14", "5+14)", "5+14)+(-18+18",
"5+15", "5+15)", "5+16)", "5+17", "5+18", "5+18)", "5+23", "50)",
"50+16", "6", "7", "75)", "75+14", "75+15", "8", "9"), class = "factor"),
V27 = structure(c(9L, 1L, 1L, 9L, 1L, 9L), .Label = c("",
"10", "11", "12", "25", "25)", "25+17", "3", "5", "5)", "5+14",
"5+15)", "5+15)+(-18", "50)", "6", "7", "75", "75)", "8",
"9"), class = "factor"), V28 = structure(c(9L, 12L, 15L,
1L, 8L, 1L), .Label = c("", "1", "10", "11", "2", "25)",
"3", "4", "5", "5)", "5+19", "6", "7", "75", "8", "9"), class = "factor"),
V29 = structure(c(1L, 5L, 10L, 1L, 6L, 1L), .Label = c("",
"25", "2prol", "30", "40", "41", "5", "5)", "50", "52", "75",
"8", "9"), class = "factor"), V30 = structure(c(1L, 6L, 12L,
5L, 7L, 13L), .Label = c("", "25", "3", "3conc", "4", "45",
"46", "5", "52", "56", "6", "60", "8", "9"), class = "factor"),
V31 = structure(c(15L, 7L, 10L, 3L, 8L, 7L), .Label = c("",
"35", "40", "43", "4mot", "5", "52", "53", "54", "55", "56",
"57", "60", "63", "7"), class = "factor"), V32 = c(43L, NA,
NA, 52L, NA, 57L), V33 = c(45L, NA, NA, 59L, NA, 56L), V34 = c(55L,
NA, NA, NA, NA, NA)), row.names = 3:8, class = "data.frame")
So my idea is:
read all the columns, identify and separate the first from the second column: the last element of the first column is highlighted by the closing parenthesis
Working on the first one: i would say "take the value of the next column and add to the previous column adding a comma before"
Working on the second one: Since the second column starts where there's the first element after the closing parenthesis, i would take the first value (in that case we have an integer) or if the following column has a number (so that the column is not empty) add the number of the following column to the first columns linked by a comma.
Note: i have the idea how to do it, but i can't translate these ideas into a code, how can i do this?

Warning Messages when running linear regression in R

I'm attempting to run a linear regression in R, but get the following errors:
Warning messages:
1: In model.response(mf, "numeric") :
using type = "numeric" with a factor response will be ignored
2: In Ops.factor(y, z$residuals) : ‘-’ not meaningful for factors
The code is:
reg_ex1 <- lm(V45~TotalScore,data = Combineddatainprogresscsv)
Both values, V45, and TotalScore are numerical. A Google search yielded a similar question where it was suggested that the csv file might have commas. But I'm not an expert so don't know how to check this?
Thank you!
There are 1300 lines, so here is just the final part of the output. Let me know if you need more.
"50", "60", "70", "80", "90", "Compared to others who may have taken this test, how well do you think you scored? - 1"
), class = "factor"), V46 = structure(c(23L, 6L, 4L, 22L,
4L, 8L), .Label = c("", "0", "1", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "2", "20", "3", "4",
"5", "6", "7", "8", "9", "Score"), class = "factor"), TotalScore = c(0L,
12L, 10L, 9L, 10L, 14L)), row.names = c(NA, 6L), class = "data.frame")
It seems your response variable V46 is a factor. You can see it in the output you pasted: V46 = structure(c(23L, 6L, 4L, 22L,
4L, 8L), .Label = c("", "0", "1", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "2", "20", "3", "4",
"5", "6", "7", "8", "9", "Score"), class = "factor")
I would suggest converting V46 to character, then to numeric and finally filter out the missing values which will be produced by the "Score" level.
You should definitely listen to the people in the comments so it's easier to help you :)

Error in ggplot

I am trying to make a ggplot. When I had shape in aesthetics, the code was working just fine. However, I need to put shape in geom_point() because I'm trying to reproduce a figure. And when I added shape to geom_point() it gave me the following error:
Aesthetics must be either length 1 or the same as the data (6): shape
I've looked for other answers here but apparently, nothing seems to be working for me. Above I've provided with an image of what my data looks like. There are 17000 entries.
Below is my code:
summarised_data <-ddply(mammals,c('mammals$chr','mammals$Species','mammals$chrMark'),
function (x) c(median_rpkm = median(x$RPKM), median = median(x$dNdS)))
ggplot(summarised_data,aes(x = summarised_data$median_rpkm, y = summarised_data$median,
color = summarised_data$`mammals$Species`)) + geom_smooth(se = FALSE, method = "lm") +
geom_point(shape = summarised_data$`mammals$chrMark`) + xlab("median RPKM") + ylab("dNdS")
"ENSG00000213221", "ENSG00000213341", "ENSG00000213380", "ENSG00000213424",
"ENSG00000213533", "ENSG00000213551", "ENSG00000213619", "ENSG00000213626",
"ENSG00000213699", "ENSG00000213782", "ENSG00000213949", "ENSG00000214013",
"ENSG00000214338", "ENSG00000214357", "ENSG00000214367", "ENSG00000214517",
"ENSG00000214814", "ENSG00000215203", "ENSG00000215305", "ENSG00000215367",
"ENSG00000215440", "ENSG00000215897", "ENSG00000221947", "ENSG00000222011",
"ENSG00000224051", "ENSG00000225830", "ENSG00000225921", "ENSG00000239305",
"ENSG00000239474", "ENSG00000239900", "ENSG00000241058", "ENSG00000242247",
"ENSG00000242612", "ENSG00000243646", "ENSG00000244038", "ENSG00000244045"),
class = "factor"), Species = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Chimp", "Gori", "Human", "Maca",
"Mouse", "Oran"), class = "factor"), labs = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Chimp-A", "Chimp-X",
"Gori-A", "Gori-X", "Human-A", "Human-X", "Maca-A", "Maca-X",
"Mouse-A", "Mouse-X", "Oran-A", "Oran-X"), class = "factor"),
chrMark = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("A", "X"), class = "factor"), chr = structure(c(27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), .Label = c("1",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"2", "20", "21", "22", "2a", "2A", "2b", "2B", "3", "4",
"5", "6", "7", "8", "9", "X"), class = "factor"), dN = c(3.00669,
3.27182, 7.02044, 1.01784, 3.0363, 2.32786, 4.92959, 3.03753,
3.0776, 1.02147), dS = c(3.15631, 5.87147, 3.13716, 2.05438,
4.10205, 5.24764, 4.2014, 3.18086, 5.4942, 3.02169), dNdS = c(0.9525965447,
0.5572403504, 2.2378329444, 0.4954487485, 0.7401908802, 0.4436013141,
1.1733207978, 0.954939859, 0.5601543446, 0.3380459279), RPKM = c(31.6,
13.9, 26.3, 9.02, 11.3, 137, 242, 1.05, 59.4, 10.1), Tau = c(0.7113820598,
0.8391023102, 0.3185943152, 0.6887167806, 0.9120531859, 0.6254200542,
0.7165302682, 0.7257435312, 0.2586613298, 0.6493567251),
GC3 = c(0.615502, 0.622543, 0.393064, 0.490141, 0.461592,
0.626407, 0.490305, 0.482853, 0.346424, 0.466484)), .Names = c("gene",
"Species", "labs", "chrMark", "chr", "dN", "dS", "dNdS", "RPKM",
"Tau", "GC3"), row.names = c(NA, 10L), class = "data.frame")
There's a few things wrong with your code and how ggplot handles non-standard evaluation, I'd recommend reading a ggplot tutorial or the docs. Having a column called within summarised_data called 'mammals$species' and 'mammals$chrMark' is going to cause lots of problems.
If we change these to something more sensible...
names(summarised_data)[names(summarised_data) == "mammals$species"] <- "mammals_species"
names(summarised_data)[names(summarised_data) == "mammals$chrMark"] <- "mammals_chrMark"
We can make the ggplot code more friendly. Note that shape has to been within aes, as you're mapping it to your data.
ggplot(summarised_data, aes(x = median_rpkm, y = median)) +
geom_smooth(se = FALSE, method = "lm") +
geom_point(aes(shape = mammals_chrMark,
color = mammals_species)) +
xlab("median RPKM") + ylab("dNdS")
Hopefully this should work, or at least get you somewhere closer to an answer.

R - plot vertical profile

I have measurements of CH4 concentration with depth:
df <- structure(list(Depth = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 15L, 16L, 17L), .Label = c("0", "10",
"12", "14", "16", "18", "2", "20", "22", "24", "26", "28", "30",
"32", "4", "6", "8", "AR"), class = "factor"), Conc_CH4 = c(4.30769230769231,
23.1846153846154, 14.5615384615385, 21.1769230769231, 16.2615384615385,
132.007692307692, 5.86923076923077, 389.353846153846, 823.023076923077,
948.684615384615, 1436.56923076923, 1939.88461538462, 26.2769230769231,
27.5538461538462, 19.6461538461538)), .Names = c("Depth", "Conc_CH4"
), row.names = c(NA, -15L), class = "data.frame")
And I need to create a plot like this:
But I have some problems: the factors in my data are in the wrong order, and I don't know how to plot this kind of data using ggplot2.
Any ideas?
Here's a solution with base plotting functions (you reverse the limits of ylim):
df$Depth <- as.numeric(as.character(df$Depth))
df <- df[order(df$Depth),]
plot(Depth~Conc_CH4, df, t="l", ylim=rev(range(df$Depth)))
Why not convert Depth to a number and plot?
ggplot(transform(df, Depth=as.numeric(as.character(df$Depth))),
aes(x=Conc_CH4, y=Depth)) +
geom_line() + scale_y_reverse()
The as.numeric(as.character(...)) is because your Depth is a factor and calling as.numeric directly converts factors differently than character to string.
The scale_y_reverse reverses the y scale.
If your actual data has a depth of "AR" in it, you'll have to omit them or otherwise handle them.

Resources