Show loess smoothed trend-line in line plot, #ggplot2. - r

How to show loess smoothed trend-line in the plot? Please help to handle the warning message: "Removed 19 rows containing non-finite values (stat_smooth)".
My data:
yrcnt<-read.table(header = TRUE, text = "year outcome pop rate pred.SC
1 1995 2306 87592001 2.632660 0.9626214
2 1996 2221 87628543 2.534562 0.9599941
3 1997 2202 81872629 2.689544 0.9573667
4 1998 2316 88200076 2.625848 0.9547394
5 1999 2456 96200312 2.553006 0.9521121
6 2000 2526 99565063 2.537035 0.9494848
7 2001 2511 95951330 2.616952 0.9468575
8 2002 2537 96976191 2.616106 0.9442302
9 2003 2618 101673130 2.574918 0.9416028
10 2004 2644 104554479 2.528825 0.9389755
11 2005 2594 100522055 2.580528 0.9363482
12 2006 2620 105787278 2.476668 0.9337209
13 2007 2722 108946407 2.498476 0.9310936
14 2008 2788 112200567 2.484836 0.9284663
15 2009 2706 104491560 2.589683 0.9258389
16 2010 2773 108651896 2.552187 0.9232116
17 2011 2764 109632577 2.521148 0.9205843
18 2012 2694 107594922 2.503836 0.9179570
19 2013 2673 107553219 2.485281 0.9153297")
http://tutorials.iq.harvard.edu/R/Rgraphics/Rgraphics.html
My code:
p1 <- ggplot(yrcnt, aes(y = log(rate), x = year))
yrcnt$pred.SC <- predict(lm(year ~ log(rate), data = yrcnt))
p1 + geom_line(aes(color = rate)) +geom_line(aes(y = pred.SC))
p1 + geom_line(aes(color = rate)) + geom_smooth()
p4 <- p1 + geom_line(aes(color = rate)) + geom_smooth(color="red")
p4 + scale_x_continuous(name = "Years",limits = c(1995, 2013),breaks = 1994:2014) +
scale_y_continuous(name = "Pancreatic Cancer Hospitalization Rate, 1995-2013",limits = c(2.4, 2.7),breaks = seq(2.4, 2.7, by = 0.1)) +
ggtitle("Long Term Trend in Pancreatic Cancer Hospitalizations")
>`geom_smooth()` using method = 'loess'
p4=Base plot with trendline
Scale plot that was failed to get incorporated to base plot=p4

In the function call to scale_y_continuous(), remove the arguments
limits = c(2.4, 2.7),
breaks = seq(2.4, 2.7,
by = 0.1))
Because the true limits of the y-axis are between 0.9 and 1, and you are setting them to a range between 2.4 and 2.7 . I don't know if you need the rate or the log(rate) here.
An alternative would be
library('ggplot2')
p1 <- ggplot(yrcnt, aes(y = rate, x = year))
######### lm() args flipped, then
######### wrapped in exp() function.
yrcnt$pred.SC <- exp(predict(lm( log(rate) ~ year, data = yrcnt)))
p1 + geom_line(aes(color = rate)) +geom_line(aes(y = pred.SC))
p4 <- p1 + geom_line(aes(color = rate)) + geom_smooth(color="red", method="loess")
p4 + scale_x_continuous(name = "Years",limits = c(1995, 2013),breaks = 1994:2014) +
scale_y_continuous(name = "Pancreatic Cancer Hospitalization Rate, 1995-2013",limits = c(2.4, 2.7),breaks = seq(2.4, 2.7, by = 0.1)) +
theme(legend.position ="none") +
ggtitle("Long Term Trend in Pancreatic Cancer Hospitalizations")

Related

geom_text() to label two separate points from different plots in ggplot

I am trying to create individual plots facetted by 'iid' using 'facet_multiple', in the following dataset (first 3 rows of data)
iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98
and wrote the following code:
# Install gg_plus from GitHub
remotes::install_github("guiastrennec/ggplus")
# Load libraries
library(ggplot2)
library(ggplus)
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
p2<- p1 + geom_text(aes(label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(label = baseIOP, hjust=-1, vjust=-1))
# Plot on multiple pages (output plot to R/Rstudio)
facet_multiple(plot = p3,
facets = 'iid',
ncol = 1,
nrow = 1,
scales = 'free')
The main issue I am having is labeling the points. The points corresponding to (x=age, y=axl) get labelled fine, but labels for the second group of points (x=baseIOP, y=baseAL) gets put in the wrong place.individual plot sample
I have had a look at similar issues in Stack Overflow e.g. ggplot combining two plots from different data.frames
But not been able to correct my code.
Thanks for your help
You need to define the x and y coordinates for the labels or they will default to the last ones specified.
Thus the geom_text() definitions should look something like:
data_longF1 <-read.table(header=TRUE, text="iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98")
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
#Need to specify the x and y coordinates or will default to the last ones defined
p2<- p1 + geom_text(aes(x=Age, y= al, label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(x=baseAGE, y= baseAL, label = baseIOP, hjust=-1, vjust=-1))
print(p3)

seasonal plot Error in data.frame(y = as.numeric(x)) arguments imply differing number of rows:

here is my Code:
month year GrandTotal Date
1 6 2014 15172331 2014-06-30
2 7 2014 24381383 2014-07-31
3 8 2014 24351338 2014-08-31
...
46 3 2018 85980914 2018-03-31
47 4 2018 72723488 2018-04-30
y <- ts(briskaranged, start=2014, frequency=12)
library(ggplot2)
#ploting of variables
autoplot(y) +
labs(x ="Date", y = "GrandTotal", title = "Amount, ggplot2")
#seasonalplot
ggseasonplot(y, year.labels=TRUE, year.labels.left=TRUE) +
ylab("amount") +
ggtitle("Seasonal plot: amount transaction per day")
but compiler show error
Error in data.frame(y = as.numeric(x), year = trunc(time(x)), cycle =
as.numeric(cycle(x)), :
arguments imply differing number of rows: 235, 47
y <- ts(briskaranged$GrandTotal, start=2014, frequency=12)
library(ggplot2)
#ploting of variables
autoplot(y) +
labs(x ="Date", y = "GrandTotal", title = "Amount, ggplot2")
#seasonalplot
ggseasonplot(y, year.labels=TRUE, year.labels.left=TRUE) +
ylab("amount") +
ggtitle("Seasonal plot: amount transaction per day")

Creating GW contours using ggplot

I am a novice at R coding and am trying to plot GW contours using X (Easting) and Y (Northing) cords and GW level (rswl) data in ggplot. An example of the data that I am trying to plot is:
X Obs_No Season Easting Northing rswl
1 56 ADE146 Winter 2017 275638.7 6131431 5.72
2 113 YAT099 Winter 2017 271723.0 6133405 3.16
4 227 YAT066 Winter 2017 276503.0 6135636 2.31
5 292 YAT053 Winter 2017 277780.8 6139285 -2.30
6 400 YAT129 Winter 2017 282065.1 6146759 5.60
7 509 PTA040 Winter 2017 270868.0 6150199 1.68
An example of the code I have tried is:
ggplot(data)+
aes(x = Easting, y = Northing, z = rswl, fill = rswl)+
geom_tile()+
geom_contour(colour = "white", alpha = 0.5)+
scale_fill_distiller(palette = "Spectral", na.value = "white") +
theme_bw()
but it comes up with "Not possible to generate contour data"
Something else I tried with 1 of my datasets is:
ggplot(data, aes(x = Easting, y = Northing, z = rswl)) +
geom_density_2d(colour = "black")+
geom_point(aes(color = factor(Obs_No)))+
theme(legend.title = element_blank())+
ggtitle("Tomw.T2 Winter 2017.csv")
This seems to be contours based on the distribution of points and has nothing to do with the GW level.
Any tips would be greatly appreciated.
Thanks

how to colour a funnel plot in ggplot R

I have drawn the attached funnel plot in ggplot, But I have 2 questions:
Is there any way to make the coloured green dot bigger (only that one);
is there any way to colour the upper and lower part of the confidence intervals?
This is what I am able to make so far:
Thank you!
The data set I am working on:
df <-
read.table(text = "
school_id year sdq_emotional
1060 7 4
1060 7 5
1060 7 7
1060 7 6
1060 7 4
1060 7 7
1060 7 8
1115 7 5
1115 7 9
1115 7 3
1136 7 1
1136 7 8
1136 7 5
1136 7 9
1135 7 4
1139 7 7
1139 7 3
2371 7 6
2371 7 3
2372 7 4
2372 7 1
2378 7 6
2378 7 7
2378 7 5", header=TRUE)
My code as follows:
# Format the data
df1 <- plyr::count(df, c('school_id'))
df2 <- merge(df,df1, by= c("school_id"))
df <- df2
M3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),mean,na.rm=T)
S3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),nona)
CG_PLOT1 <- merge(M3,S3,by="Group.1")
names(CG_PLOT1) <- c("School","Mean","Size")
LINE3 <- data.frame(M3=rep(mean(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
SD3=rep(sd(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
N3=sqrt(1:(max(CG_PLOT1$Size)+25)))
ID <- 1060
filling3 <- rep("white",nrow(CG_PLOT1))
filling3[CG_PLOT1$School ==ID]<-"green"
# Build the graph
ggplot(data = CG_PLOT1) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 + qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 - qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_segment(xend = max(CG_PLOT1$Size)+25,yend=mean(LINE3$M3,na.rm=T)),
aes(x = 1, y = mean(LINE3$M3,na.rm=T), size=1, colour="steelblue2") +
geom_point(data = CG_PLOT1, aes(x = Size, y = Mean), size = 2,
colour = "black", shape = 21,fill = filling3) +
ylim(0, 8)
thank you very much!
As you didn't provide a reproducible example, I have used this question as a template for your problem:
Creating a dataset here:
library(ggplot2)
set.seed(101)
x <- runif(100, min=1, max=10)
y <- rnorm(length(x), mean=5, sd=0.1*x)
df <- data.frame(x=x*70, y=y)
m <- lm(y ~ x, data=df)
fit95 <- predict(m, interval="conf", level=.95)
fit99 <- predict(m, interval="conf", level=.999)
df <- cbind.data.frame(df,
lwr95=fit95[,"lwr"], upr95=fit95[,"upr"],
lwr99=fit99[,"lwr"], upr99=fit99[,"upr"])
To add a colour background to the funnel plot, we can use the geom_ribbon function within ggplot to fill the area between a ymin and ymax. In this case, we will use the data used to construct each of the lines:
ggplot(df, aes(x, y)) +
# Add background
geom_ribbon(ymin= df$upr99, ymax = Inf, fill = "#e2a49a", alpha = 0.5) +
geom_ribbon(ymin = df$lwr99, ymax = df$upr99, fill = "#e0ba9d", alpha = 0.5 ) +
geom_ribbon(ymin = 0, ymax = df$lwr99, fill = "#8fd6c9", alpha = 0.5 ) +
# Overlay points and lines
geom_point() +
geom_smooth(method="lm", colour="black", lwd=1.1, se=FALSE) +
geom_line(aes(y = upr95), color="black", linetype=2) +
geom_line(aes(y = lwr95), color="black", linetype=2) +
geom_line(aes(y = upr99), color="red", linetype=3) +
geom_line(aes(y = lwr99), color="red", linetype=3)
labs(x="No. admissions...", y="Percentage of patients...")
As for changing the size of one point, you can check out the answer here. I would recommend subsetting the data to extract the one point, and then add another layer for the geom_point and then changing the size and colour argument of the new layer`

Multiple colour scales in one stacked bar plot using ggplot

I created a stacked bar plot which depicts the distribution of council seats (= y-axis) among parties within a municipality over several years (= x-axis). The used code and some data is below. Unfortunately, I don't have yet sufficient points to post the graph.
The different parties are also associated with a variable called "ideology" as a category for different political orientations ("progressive", "moderate", "conservative").
I would like to modify the colors in such a way that all e.g. conservative parties have different kinds of blues; all progressive parties different kinds of green; and all moderate parties e.g. different kinds of red;
The variable on the ideology is in the same dataframe (y).
Any hint how to get this modification? I already tried color=factor(ideology) and group=ideology but to no avail. I am also aware of this related entry Using a pre-defined color palette in ggplot however it doesn't pertain specifically to my problem.
Many thanks.
Used command:
municipality.plot <- ggplot(y, aes(x=as.factor(year), y=seats, fill=party, color=party)) +
geom_bar(bandwidth=1, stat="identity", group="party", position="fill") +
labs(x="year", y="% of seats for municipality")
Sample data:
year district.id party seats ideology
1 2012 127 Stranka Pravde I Razvoja Bosne I Hercegovine 1 p
2 2012 127 Savez Za Bolju Buducnost (SBB) 3 p
3 2008 127 Stranka Demokratske Akcije (SDA) 13 p
4 2004 127 Stranka Demokratske Akcije (SDA) 14 p
5 2008 127 Hrvatska Demokratska Zajednica (HDZ) 1 c
6 2008 127 Stranka Pravde I Razvoja Bosne I Hercegovine 1 p
7 2012 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 4 p
8 2000 127 Socijaldemokratska Partija (SDP) 8 m
9 2012 127 Narodna Stranka Radom Za Boljitak (NSRzB) 2 m
10 2012 127 Socijaldemokratska Unija Bih (SDU) 1 p
11 2000 127 Koalicija - SDA, SBiH 15 p
12 2008 127 Socijaldemokratska Partija (SDP) 5 m
13 2008 127 Narodna Stranka Radom Za Boljitak (NSRzB) 1 m
14 2008 127 Koalicija - LDS, SDU 2 m
15 2000 127 Lgk-liberalno-gradanska Koalicija Bih (liberali Bih, Gds Bih) 1 m
16 2000 127 Nova Hrvatska Inicijativa (NHI) 1 c
17 1997 127 Socijaldemokratska Partija (SDP) 3 m
18 2012 127 Socijaldemokratska Partija (SDP) 6 m
19 2004 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 5 p
20 1997 127 Bosanskohercegovacka Patriotska Stranka (BPS) 9 p
21 2000 127 Bosanskohercegovacka Patriotska Stranka (BPS) 3 p
22 2008 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 4 p
23 1997 127 Hrvatska Demokratska Zajednica (HDZ) 5 c
24 2000 127 Hrvatska Demokratska Zajednica (HDZ) 2 c
25 2012 127 Stranka Demokratske Akcije (SDA) 10 p
26 2004 127 Socijaldemokratska Partija (SDP) 6 m
27 1997 127 Koalicija - SDA, SBiH, Liberali, GDS 13 p
# load relevant packages
library(scales)
library(grid)
library(ggplot2)
library(plyr)
# assume your data is called df
# order data by year, ideology and party
df2 <- arrange(df, year, ideology, party)
########################################
# create one colour palette per ideology
# count number of parties per ideology
tt <- with(df2[!duplicated(df2$party), ], table(ideology))
# conservative parties blues
# progressive parties green
# moderate parties red
blue <- brewer_pal(pal = "Blues")(tt[names(tt) == "c"])
green <- brewer_pal(pal = "Greens")(tt[names(tt) == "p"])
red <- brewer_pal(pal = "Reds")(tt[names(tt) == "m"])
# create data on party and ideology
party_df <- df2[!duplicated(df2$party), c("party", "ideology")]
# set levels of ideologies; c, p, m
party_df$ideology <- factor(party_df$ideology, levels = c("c", "p", "m"))
# order by ideology and party
party_df <- arrange(party_df, ideology, party)
# add fill colours
party_df$fill <- c(blue, green, red)
# set levels of parties based on the order of parties in party_df
party_df$party <- factor(party_df$party, levels = party_df$party)
# use same factor levels for parties in df2
df2$party <- factor(df2$party, levels = party_df$party)
##################################
# Alternative 1. Plot with one legend
g1 <- ggplot(data = df2, aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "year", y = "% of seats for municipality") +
coord_cartesian(ylim = c(0, 1)) +
scale_fill_manual(values = party_df$fill, name = "Parties") +
theme_classic()
g1
#####################################3
# alt 2. Plot with separate legends for each ideology
# create separate plots for each ideology to get legends
# conservative parties blue
cons <- ggplot(data = df2[df2$ideology == "c", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = blue, name = "Conservative parties" )
# extract 'cons' legend
tmp <- ggplot_gtable(ggplot_build(cons))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_cons <- tmp$grobs[[leg]]
# progressive parties green
prog <- ggplot(data = df2[df2$ideology == "p", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = green, name = "Progressive parties" )
# extract 'prog' legend
tmp <- ggplot_gtable(ggplot_build(prog))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_prog <- tmp$grobs[[leg]]
# moderate parties red
mod <- ggplot(data = df2[df2$ideology == "m", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = red, name = "Moderate parties" )
# extract 'mod' legend
tmp <- ggplot_gtable(ggplot_build(mod))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_mod <- tmp$grobs[[leg]]
#######################################
# arrange plot and legends
# define plotting regions (viewports) for plot and legends
vp_plot <- viewport(x = 0.25, y = 0.5,
width = 0.5, height = 1)
vp_legend_cons <- viewport(x = 0.66, y = 0.87,
width = 0.5, height = 0.15)
vp_legend_prog <- viewport(x = 0.7, y = 0.55,
width = 0.5, height = 0.60)
vp_legend_mod <- viewport(x = 0.75, y = 0.2,
width = 0.5, height = 0.30)
# clear current device
grid.newpage()
# add objects to the viewports
# plot without legend
print(g1 + theme(legend.position = "none"), vp = vp_plot)
upViewport(0)
# legends
pushViewport(vp_legend_cons)
grid.draw(legend_cons)
upViewport(0)
pushViewport(vp_legend_prog)
grid.draw(legend_prog)
upViewport(0)
pushViewport(vp_legend_mod)
grid.draw(legend_mod)

Resources