ggplot sub-plots with categorical and numeric in R - r

I have a the following table and I need to plot this to show (week in x-axis and percent in y-axis). MY following code plots nothing but gives me a message. Can someone help me to fix this?
Any help is appreciated.
dfx1:
Year State Cty Week ac_sum percent
1998 KS Coffey 10-1 79 6.4
1998 KS Coffey 10-3 764 62
1998 KS Coffey 10-4 951 77.2
1998 KS Coffey 10-5 1015 82.4
1998 KS Coffey 11-2 1231 100
1998 KS Crawford 10-3 79 6.1
1998 KS Crawford 10-4 764 15.8
1998 KS Crawford 10-5 951 84.1
1998 KS Crawford 11-2 1015 100
.
.
.
.
gg <- ggplot(dfx1, aes(Week,percent, col=Year))
gg <- gg + geom_line()
gg <- gg + facet_wrap(~Cty, 2, scales = "fixed")
gg <- gg + xlim(c(min(dfx1$Week), max(dfx1$Week)))
plot(gg)
geom_path: Each group consists of only one observation. Do you need to
adjust the group aesthetic?

Is this what you want?
dfx1 <- read.table(text="Year State Cty Week ac_sum percent
1998 KS Coffey 10-1 79 6.4
1998 KS Coffey 10-3 764 62
1998 KS Coffey 10-4 951 77.2
1998 KS Coffey 10-5 1015 82.4
1998 KS Coffey 11-2 1231 100
1998 KS Crawford 10-3 79 6.1
1998 KS Crawford 10-4 764 15.8
1998 KS Crawford 10-5 951 84.1
1998 KS Crawford 11-2 1015 100", header=T)
library(ggplot2)
ggplot(dfx1, aes(Week,percent, col=Year)) +
geom_point() +
facet_wrap(~Cty, 2, scales = "fixed")
ggplot(dfx1, aes(Week,percent, col=Year, group=1)) +
geom_point() + geom_line() +
facet_wrap(~Cty, 2, scales = "fixed")

You can look at other answers like this one to see that you're missing group = Year in your plot. Adding it in will give you what you are looking for:
library(ggplot2)
dfx1$Week <- factor(dfx1$Week, ordered = T)
ggplot(dfx1, aes(Week, percent, col = Year, group = Year)) +
geom_line() +
facet_wrap(~Cty, 2, scales = 'fixed')
With your last line it looks like you're wanting to only show the Weeks that actually have data. You can do that with scales = 'free', like so:
ggplot(dfx1, aes(Week, percent, col = Year, group = Year)) +
geom_line() +
facet_wrap(~Cty, 2, scales = 'free')

Related

How to make exploratory plots using only certain rows in a column

I am making some exploratory plots to analyze zone M. I need one that plots Distance over time and another with Distance vs. MHT.
Here is what I have so far:
library(ggplot2)
ggplot(datmarsh, aes(x=Year, y=Distance)) + geom_point()
ggplot(datmarsh, aes(x=MHT, y=Distance)) + geom_point()
What I'm struggling with is specifying only zone "M" in each of these graphs.
Here is a sample of what my data looks like:
Year Distance MHT Zone
1975 253.1875 933 M
1976 229.75 877 M
1977 243.8125 963 M
1978 243.8125 957 M
1975 103.5 933 P
1976 150.375 877 P
1977 117.5625 963 P
1978 131.625 957 P
1979 145.6875 967 P
1975 234.5 933 PP
1976 314.1875 877 PP
1977 248.5625 963 PP
1978 272 957 PP
1979 290.75 967 PP
Thanks!
dplyr::filter() will let you do what you need. However, this has probably been answered elsewhere a few times, so do try searching!
library(dplyr)
library(ggplot2)
library(magrittr)
datmarsh %>%
filter(Zone == "M") %>%
ggplot(aes(x=Year, y=Distance)) +
geom_point()
datmarsh %>%
filter(Zone == "M") %>%
ggplot(daes(x=MHT, y=Distance)) +
geom_point()

Binding multiple csvs in R to output to a graph in ggplot2

This is an example of my data:
HUC8 YEAR RO_MM
bcc1_45Fall_1020004 1961 112.0
bcc1_45Fall_1020004 1962 243.7
bcc1_45Fall_1020004 1963 233.3
bcc1_45Fall_1020004 1964 190.3
bcc1_M_45Fall_1020004 1961 100.9
bcc1_M_45Fall_1020004 1962 132.3
bcc1_M_45Fall_1020004 1963 255.1
bcc1_M_45Fall_1020004 1964 281.9
bnuesm_45Fall_1020004 1961 89.0
bnuesm_45Fall_1020004 1962 89.5
bnuesm_45Fall_1020004 1963 126.8
bnuesm_45Fall_1020004 1964 194.3
canesm2_45Fall_1020004 1961 186.6
canesm2_45Fall_1020004 1962 197.4
canesm2_45Fall_1020004 1963 229.1
canesm2_45Fall_1020004 1964 141.8
Each of the similar prefixes represents (a segment of) a single csv. I have called them into a list and used rbind to link them. My goal is to have each csv represent a line of data, which would look like this:
Name
1961 1962 1963 1964 ...
bcc1_45Fall_1020004 112.0 243.7 233.3 190.3
bcc1_M_45Fall_1020004 100.9 132.3 255.1 281.9
bnuesm_45Fall_1020004 89.0 89.5 126.8 194.3
canesm2_45Fall_1020004 186.6 197.4 229.1 141.8
I would then like to plot these lines in a line graph using ggplot2where each Name becomes a line of "RO_MM" data over 140 years. Remember, this is only a tiny sample. There are actually hundreds of files. I know that hundreds is too many for a graph and plan to do them in smaller groups, but I DO NOT need to grid them together. I have so far used this code which has provided the initial datalist above:
library(rio)
library(tidyverse)
library(data.table)
file_names <- list.files("~/Desktop/Rproj/splitByHUCs45/a01020004/splFall")
data_list <- lapply(file_names, read.csv , header=TRUE, sep=",")
finalTable <- do.call(rbind, data_list)
I have found this code (below). It is not what I need because I don't need the mean of anything, but I saw that it used more than one csv for input, so I'm trying to make sense of it, but don't know how to make it work for me.
#some pseudo data for testing
my_other_data <- myData
my_other_data$Data <- my_other_data$Data * 0.5
pplot <- ggplot(data=myData, aes(x=Group, y=Data)) +
stat_summary(fun = mean, geom = "line", color='red') +
stat_summary(data=my_other_data, aes(x=Group, y=Data),
fun = mean, geom = "line", color='green') +
xlab("Group") +
ylab("Data")
pplot
That said, the page on creating a reprex said that I should provide you with this:
head(finalTable, 3) %>%
+ deparse()
[1] "structure(list(HUC8 = structure(c(1L, 1L, 1L), .Label = c(\"bcc1_45Fall_1020004\", "
[2] "\"bcc1_M_45Fall_1020004\", \"bnuesm_45Fall_1020004\", \"canesm2_45Fall_1020004\", "
[3] "\"ccsm4_45Fall_1020004\", \"cnrmcm5_45Fall_1020004\", \"csiromk360_45Fall_1020004\", "
[4] "\"gfdlesm2g_45Fall_1020004\", \"gfdlesm2m_45Fall_1020004\", \"hadgem2cc_45Fall_1020004\", "
[5] "\"hadgem2es_45Fall_1020004\", \"hist_Fall_1020004\", \"inmcm4_45Fall_1020004\", "
[6] "\"ipslcm5_alr_45Fall_1020004\", \"ipslcm5_blr_45Fall_1020004\", \"ipslcm5amr_45Fall_1020004\", "
[7] "\"miroc5_45Fall_1020004\", \"mirocesm_45Fall_1020004\", \"mirocesmchem_45Fall_1020004\", "
[8] "\"mricgcm3_45Fall_1020004\", \"noresm1m_45Fall_1020004\"), class = \"factor\"), "
[9] " YEAR = 1961:1963, RO_MM = c(112, 243.7, 233.3)), row.names = c(NA, "
[10] "3L), class = \"data.frame\")"
I would appreciate getting help structuring the data so that I can bring it into ggplot2 and how to make a graph with ggplot2, and explanations would be especially helpful. Thanks.
I wasn't able to use your example data (please use dput(head(finalTable)) instead of deparse), but here is one potential solution using the data at the beginning of your question:
# Load libraries and data
library(tidyverse)
dat1 <- read.table(text = "HUC8 YEAR RO_MM
bcc1_45Fall_1020004 1961 112.0
bcc1_45Fall_1020004 1962 243.7
bcc1_45Fall_1020004 1963 233.3
bcc1_45Fall_1020004 1964 190.3
bcc1_M_45Fall_1020004 1961 100.9
bcc1_M_45Fall_1020004 1962 132.3
bcc1_M_45Fall_1020004 1963 255.1
bcc1_M_45Fall_1020004 1964 281.9
bnuesm_45Fall_1020004 1961 89.0
bnuesm_45Fall_1020004 1962 89.5
bnuesm_45Fall_1020004 1963 126.8
bnuesm_45Fall_1020004 1964 194.3
canesm2_45Fall_1020004 1961 186.6
canesm2_45Fall_1020004 1962 197.4
canesm2_45Fall_1020004 1963 229.1
canesm2_45Fall_1020004 1964 141.8",
header = TRUE)
# Create your table
dat1 %>%
pivot_wider(names_from = YEAR, values_from = RO_MM)
# A tibble: 4 x 5
# HUC8 `1961` `1962` `1963` `1964`
# <chr> <dbl> <dbl> <dbl> <dbl>
#1 bcc1_45Fall_1020004 112 244. 233. 190.
#2 bcc1_M_45Fall_1020004 101. 132. 255. 282.
#3 bnuesm_45Fall_1020004 89 89.5 127. 194.
#4 canesm2_45Fall_1020004 187. 197. 229. 142.
# Create a line plot (don't need to use the table for this)
dat1 %>%
ggplot(aes(x = YEAR, y = RO_MM, group = HUC8, color = HUC8)) +
geom_line()
And you can 'group' your results however you like, e.g.
dat1 %>%
mutate(group = ifelse(str_detect(string = HUC8, pattern = "bcc"),
"group_bcc", "group_others")) %>%
ggplot(aes(x = YEAR, y = RO_MM, group = HUC8, color = HUC8)) +
geom_line() +
facet_grid(rows = vars(group))
And, if you don't want a grid (like you said in your question):
dat1 %>%
mutate(group = ifelse(str_detect(string = HUC8, pattern = "bcc"),
"group_bcc", "group_others")) %>%
filter(group == "group_bcc") %>%
ggplot(aes(x = YEAR, y = RO_MM, group = HUC8, color = HUC8)) +
geom_line() +
ggtitle("bcc csv files only")
You can "highlight" one specific line using a package (e.g. gghighlight) or just tell ggplot which colours you want to use, e.g.
dat1 %>%
ggplot(aes(x = YEAR, y = RO_MM, group = HUC8, color = HUC8)) +
geom_line() +
scale_color_manual(values = c("black", viridis::viridis(3, alpha = 0.33)))

How to create cumulative precipitation vs. temperature graph in a single plot

I have historical data for precip vs. annual temperature. I want to plot them into cool & wet, warm and wet, cool and dry, warm and dry years. Can someone help me with this?
Year Precip annual temperature
1987 821 8.5
1988 441 8
1989 574 7.9
1990 721 12.4
1991 669 10.8
1992 830 10
1993 1105 7.8
1994 772 8
1995 678 6.7
1996 834 8
1997 700 11
1998 786 11.2
1999 612 12
2000 758 10.6
2001 833 11
2002 622 10.6
2003 656 10.7
2004 799 9.9
2005 647 10.8
2006 764 12
2007 952 12.5
2008 943 10.86
2009 610 12.8
2010 766 11
2011 717 11.3
2012 602 9.5
2013 834 10.6
2014 758 11
2015 841 11
2016 630 11.5
2017 737 11.2
Average 742.32 10.36
As Majid suggested, you need to give more detail so you can get better answers. At least, try to use dput() with your dataframe, so we can get a reproducible copy of it. Copying and pasting into Excel is not appropriate for these kind of questions.
In any case, that graph can be easily be done using the ggplot2 package. You graph each year based on its X and Y coordinates and then manually add the lines and the titles for each category. You do need to establish the boundaries between cool/warm and dry/wet, of course.
library(ggplot2)
rain <- read.csv('~/data/rain.csv')
limit_humid <- 800
limit_warm <- 9.5
ggplot(rain, aes(x = temp, y = precip)) +
geom_text(aes(label = year)) +
geom_vline(xintercept = limit_warm) +
geom_hline(yintercept = limit_humid) +
annotate('text', label = 'bold("Cool and wet")', size = 4, parse = T,
x = min(rain$temp), y = max(rain$precip)) +
annotate('text', label = 'bold("Warm and wet")', size = 4, parse = T,
x = max(rain$temp), y = max(rain$precip)) +
annotate('text', label = 'bold("Cool and dry")', size = 4, parse = T,
x = min(rain$temp), y = min(rain$precip)) +
annotate('text', label = 'bold("Warm and wet")', size = 4, parse = T,
x = max(rain$temp), y = min(rain$precip)) +
theme_classic() +
labs(x = 'Average Temperature (°C)',
y = 'Cumulative precipitation (mm)')

Line chart issues - plot looks "funny" (ggplot2)

I have a large dataframe (CO2_df) with many years for many countries, and tried to plot a graph with ggplot2. This graph will have 6 curves + an aggregate curve. However, my graph looks pretty "funny" and I have no idea why.
The data looks like this (excerpt):
x y x1 x2 x4 x6
1553 1993 0.00000 CO2 Austria 6 6 - Other Sector
1554 2006 0.00000 CO2 Austria 6 6 - Other Sector
1555 2015 0.00000 CO2 Austria 6 6 - Other Sector
2243 1998 12.07760 CO2 Austria 5 5 - Waste management
2400 1992 11.12720 CO2 Austria 5 5 - Waste management
2401 1995 11.11040 CO2 Austria 5 5 - Waste management
2402 2006 10.26000 CO2 Austria 5 5 - Waste management
2489 1998 0.00000 CO2 Austria 6 6 - Other Sector
I have used this code:
ggplot(data=CO2_df, aes(x=x, y=y, group=x6, colour=x6)) +
geom_line() +
geom_point() +
ggtitle("Austria") +
xlab("Year") +
ylab("C02 Emissions") +
labs(colour = "Sectors")
scale_color_brewer(palette="Dark2")
CO2_df %>%
group_by(x) %>%
mutate(sum.y = sum(y)) %>%
ggplot(aes(x=x, y=y, group=x6, colour=x6)) +
geom_line() +
geom_point() +
ggtitle("Austria") +
xlab("Year") +
ylab("C02 Emissions") +
labs(colour = "Sectors")+
scale_color_brewer(palette="Dark2")+
geom_line(aes(y = sum.y), color = "black")
My questions
1) Why does it look like this and how can I solve it?
2) I have no idea why the value on the y axis are close to zero. They are not...
3) How can I add an entry to the legend for the aggregate line?
Thank you for any sort of help!
Nordsee
What about something like this:
CO2_df %>% # data
group_by(x,x6) %>% # group by
summarise(y = sum(y)) %>% # add the sum per group
ggplot(aes(x=x, y=y)) + # plot
geom_line(aes(group=x6, fill=x6, color=x6))+
# here you can put a summary line, like sum, or mean, and so on
stat_summary(fun.y = sum, na.rm = TRUE, color = 'black', geom ='line') +
geom_point() +
ggtitle("Austria") +
xlab("Year") +
ylab("C02 Emissions") +
labs(colour = "Sectors")+
scale_color_brewer(palette="Dark2"))
With modified data, to see the right behaviour, I've put same years and very different values to understand:
CO2_df <- read.table(text ="
x y x1 x2 x4 x6
1553 1993 20 CO2 'Austria' 6 '6 - Other Sector'
1554 1994 23 CO2 'Austria' 6 '6 - Other Sector'
1555 1995 43 CO2 'Austria' 6 '6 - Other Sector'
2243 1993 12.07760 CO2 'Austria' 5 '5 - Waste management'
2400 1994 11.12720 CO2 'Austria' 5 '5 - Waste management'
2401 1995 11.11040 CO2 'Austria' 5 '5 - Waste management'
2402 1996 10.26000 CO2 'Austria' 5 '5 - Waste management'
2489 1996 50 CO2 'Austria' 6 '6 - Other Sector'", header = T)

Plot point and line graph in primary and secondary y-axis using ggplot in R

I have the following table. I need to plot "Area" in primary y-axis as points with "Weeks" in x-axis. For the same x-axis I need to plot "SM9_5" in secondary y-axis. I have my code below but does not plot it correct.
Any idea is appreciated.
Thanks.
YEAR Week Area SM9_5 sum percent COUNTY
2002 9-2 250 212.2 250 10.2 125
2002 10-1 300 450.2 550 22.5 125
2002 10-2 100 150.2 650 100.0 125
2002 9-3 50 212.2 250 10.2 15
2002 10-1 30 450.2 550 22.5 15
2002 10-2 10 150.2 650 100.0 15
2003 9-2 12 112.2 12 20.2 150
2003 10-1 15 350.2 27 82.5 150
2003 10-2 16 650.2 43 100.0 150
gg <- gg + geom_point(aes(y = Area, colour = "Area"))
gg <- gg + geom_line(aes(y = SM9_1, colour = "Sep_SM_9-1"))
gg <- gg + scale_y_continuous(sec.axis = sec_axis(~., name = "Soil Moisture"))
gg <- gg + scale_colour_manual(values = c("blue","red"))
gg <- gg + facet_wrap(~COUNTY, 2, scales = "fixed")
gg <- gg + labs(y = "Area",
x = "Weeks",
colour = "Parameter")
plot(gg)
My plot is shown below.

Resources