Problems passing string with \n to ggtitle - r

I think this might have an easy answer - which I can't seem to find anywhere - so I'll forgo the reproducibility for the moment. I have a function designed to draw a ggplot2. I use mapply to pass it a few vectors of strings for the functions input parameters. The parameter of concern here is title. Which is fed a character vector with elements such as "this is a plot title".
Then the following code:
p <- ggplot(df, aes(x=date, y=value))
## plot the line
p <- p + geom_line()
## add plot title
p <- p + ggtitle(title)
actually works just fine and the plot title is "this is a plot title" as expected.
However if the title is long and I want to specify a point to wrap the title using \n it fails to work.
Precisely if I feed ggtitle an element of "this is a \n plot title". I get exactly that contained in the quotes, rather than wrapping the title at the \n. My suspicion is I need eval, or paste or get, but my formations of such a request have failed to achieve the desired results. I appreciate the help.
UPDATE:
I guess it must be the interaction with mapply. This should allow you to reproduce the problem.
create data.frame of strings as sample and assign it to fred.M.SA
structure(list(RegionalCoverage = c("National", "National", "National",
"National", "National", "National"), GeographicLevel = c("MSA",
"MSA", "MSA", "MSA", "MSA", "MSA"), Category = c("Workers", "Workers",
"Workers", "Workers", "Workers", "Workers"), Sector = c("Labor Market",
"Labor Market", "Labor Market", "Labor Market", "Labor Market",
"Labor Market"), Source2 = c("FRED", "FRED", "FRED", "FRED",
"FRED", "FRED"), Title = c("Unemployment Rate in La Crosse, WI-MN (MSA)",
"Trade, Transportation and Utilities Employment in La Crosse, WI-MN (MSA)",
"Professional and Business Services Employment in La Crosse, WI-MN (MSA)",
"Other Services Employment in La Crosse, WI-MN (MSA)", "Manufacturing Employment in La Crosse, WI-MN (MSA)",
"Leisure and Hospitality Employment \\n in La Crosse, WI-MN (MSA)"
), SeriesID = c("LACR155UR", "LACR155TRAD", "LACR155PBSV", "LACR155SRVO",
"LACR155MFG", "LACR155LEIH"), Units = c("%", "Thous. of Persons",
"Thous. of Persons", "Thous. of Persons", "Thous. of Persons",
"Thous. of Persons"), Freq = c("M", "M", "M", "M", "M", "M"),
Seas = c("SA", "SA", "SA", "SA", "SA", "SA"), OriginalSource = c("U.S. Department of Labor: Bureau of Labor Statistics",
"Federal Reserve Bank of St. Louis", "Federal Reserve Bank of St. Louis",
"Federal Reserve Bank of St. Louis", "Federal Reserve Bank of St. Louis",
"Federal Reserve Bank of St. Louis"), Method = c("ImportXML",
"ImportXML", "ImportXML", "ImportXML", "ImportXML", "ImportXML"
), LinktoSource = c("", "", "", "", "", ""), Link.to.Data.Spreadsheet.Name = c("",
"", "", "", "", ""), Link.to.Data.Storage = c("", "", "",
"", "", ""), Link.to.Data.Manipulation.File = c(NA, NA, NA,
NA, NA, NA), Link.to.Data.Manipulation.File.1 = c(NA, NA,
NA, NA, NA, NA)), .Names = c("RegionalCoverage", "GeographicLevel",
"Category", "Sector", "Source2", "Title", "SeriesID", "Units",
"Freq", "Seas", "OriginalSource", "Method", "LinktoSource", "Link.to.Data.Spreadsheet.Name",
"Link.to.Data.Storage", "Link.to.Data.Manipulation.File", "Link.to.Data.Manipulation.File.1"
), row.names = c(27L, 34L, 44L, 46L, 47L, 48L), class = "data.frame")
MakelineFred <- function(series, ylab="",xlab="", title="") {
require(ggplot2) # hadley's plotting framework
require(scales) # to adjust y axis scales
require(ggthemes) # extra themes including tufte
require(xts) # our favorite time series
require(gridExtra) # for adding a caption
require(timeDate) # for our prediction at the end
require(quantmod) #
# Get Data using quantmod
data <- getSymbols(series,src="FRED") #fred ignore from dates
# convert the string df to object df
data.xts <- get(data)
## convert data to data.frame
df <- data.frame(
date=as.Date(index(data.xts)),
value=as.numeric(data.xts))
p <- ggplot(df, aes(x=date, y=value))
## plot the line
p <- p + geom_line()
## add plot title
p <- p + ggtitle(title)
file <- paste("_",series,".png",sep="")
ggsave(file=file, plot=p, width=6, height=4)
finally here is the mapply call.
mapply(MakelineFred, series=fred.M.SA$SeriesID, title=fred.M.SA$Title)

Related

Indicating An Event on a Plot

I am studying patient fluid intake and frequency of urination.
I'm collecting volume and time of fluids drank and time of urination.
I want to indicate on a graph that has liquid intake when urination occurs.
Here's my data and code so far ...
time_log <- c("01:10", "05:50", "06:00","06:15", "06:25", "09:35", "10:00", "12:40",
"14:00")
time_log <- paste("04/04/2019", time_log, sep=" ")
time_log <- strptime(time_log, format = "%d/%m/%Y %H:%M")
time_view <- format(time_log, "%H:%M")
event <- c("u", "u", "T", "T", "u", "u", "T","T","u")
Volume <- c(NA, NA, 0.25, 0.25, NA, NA, 0.125, 0.625, NA)
patient_data <- data.frame(time_log, time_view, event, Volume)
total_liquids <- sum(patient_data$Volume, na.rm=TRUE)
plot(patient_data$time_log, patient_data$Volume,
xlim = c(as.POSIXct("2019-04-04 00:00:00"),as.POSIXct("2019-04-04 24:00:00")),
xlab="Hours of Study", ylab = "Volume of Liquid Drank /L",
main = paste("Total Liquids Drank = ", total_liquids, " L"))
This is related to the following question
Time Series Data - How to which was poorly received by the Stack Overflow community.
Here's a way using ggplot2 and dashed vertical lines. When adding the geom_vline, we subset the data for just the urination events (i.e., event == "u").
library(ggplot2)
ggplot(patient_data, aes(x = time_log, y = Volume)) +
geom_point() +
geom_vline(
data = subset(patient_data, event == "u"),
aes(xintercept = time_log),
linetype = 2
) +
labs(
title = paste("Total Liques Drank = ", total_liquids, " L"),
subtitle = "Dashed line reprents urination",
x = "Hours of Study",
y = "Volume of Liquid Drank (L)"
) +
scale_y_continuous(limits = c(0, NA)) # just so we don't start the y-axis at 0.1 or something misleading.

How to make a US map based on state-level data?

I am trying to use ggplot2 to make a US map based on some state level data, and color each state based on the value of one variable.
State loan
AL 25310770
AK 45310770
AZ 35310770
AR 25682770
...
Edit: Thanks to #Hector Haffenden, the dput(head(your_data)) gives:
structure(list(state = c("AL", "AK", "AZ", "AR", "IL", "MA"),
loan = c(25310770, 21230922, 15055436, 15212963, 12796921, 20311736),
row.names = c(NA, 6L), class = "data.frame")
Since I have a variable of state name, is it possible to automatically match each row to the map based on the state name abbreviations? Here is an example of my expected output:
https://i.imgur.com/0CD4fOx.png
Try this, first define our data like this,
dat <- data.frame(state = c("AL", "AK", "AZ", "AR"), Loan = c(25310770, 45310770, 35310770, 25682770))
Import the packages usmap and ggplot2, then, with more complete data, it will fill the whole map, but using the sample provided, we see
library(usmap)
library(ggplot2)
plot_usmap(
data = dat, values = "Loan", lines = "red"
) +
scale_fill_continuous(
low = "white", high = "red", name = "Loan", label = scales::comma
) +
labs(title = "US States", subtitle = "States and loan data") +
theme(legend.position = "right")
Note some states are grey due to small sample of data provided.

How can I avoid pie chart&legend overlap in R?

I wanna create a pie chart of crime types,and add a legend on the right hand,but I tried many times to avoid overlapping,doesn't work at all.
table(dd$Primary.Type.new)
ARSON ASSAULT BATTERY BURGLARY
833 30743 91237 29298
CRIMINAL DAMAGE CRIMINAL TRESPASS DECEPTIVE PRACTICE HOMICIDE
57539 14353 17472 640
KIDNAPPING MOTOR VEHICLE THEFT NARCOTOCS OFFENSE INVOLVING CHILDREN
517 23724 55685 3347
OTHER OFFENSE PUBLIC OFFENSE PUBLIC PEACE VIOLATION ROBBERY
30878 3833 3632 18891
SEX_CRIME THEFT WEAPONS VIOLATION
9331 103255 4792
Type <- table(dd$Primary.Type.new)
Here's that from dput():
structure(c(ARSON = 833L, ASSAULT = 30743L, BATTERY = 91237L,
BURGLARY = 29298L, `CRIMINAL DAMAGE` = 57539L, `CRIMINAL TRESPASS` = 14353L,
`DECEPTIVE PRACTICE` = 17472L, HOMICIDE = 640L, KIDNAPPING = 517L,
`MOTOR VEHICLE THEFT` = 23724L, NARCOTOCS = 55685L, `OFFENSE INVOLVING CHILDREN` = 3347L,
`OTHER OFFENSE` = 30878L, `PUBLIC OFFENSE` = 3833L, `PUBLIC PEACE VIOLATION` = 3632L,
ROBBERY = 18891L, `SEX CRIME` = 9331L, THEFT = 103255L, `WEAPONS VIOLATION` = 4792L
), .Dim = 19L, .Dimnames = list(. = c("ARSON", "ASSAULT", "BATTERY",
"BURGLARY", "CRIMINAL DAMAGE", "CRIMINAL TRESPASS", "DECEPTIVE PRACTICE",
"HOMICIDE", "KIDNAPPING", "MOTOR VEHICLE THEFT", "NARCOTOCS",
"OFFENSE INVOLVING CHILDREN", "OTHER OFFENSE", "PUBLIC OFFENSE",
"PUBLIC PEACE VIOLATION", "ROBBERY", "SEX CRIME", "THEFT", "WEAPONS VIOLATION"
)), class = "table") -> Type
piepercent<- round(100*Type/sum(Type), 1)
pie(Type, edges = 200, radius = 0.8,
clockwise = FALSE,angle = 45, col = rainbow(length(Type)), main = "Pie Chart of Primary Crime Types", labels = piepercent,labelcex = 0.8)
legend("right", inset = .05, title = "Primary Crime Type",legend= dd$Primary.Type.new,fill = rainbow(length(Type)), horiz=FALSE,cex = 0.6)
I tried to use par(), but doestn't work.
and BTW how can I change the labels into percentage? such as convert 20.7 into 20.7%.
Thank you very much.
Update
I also tried 3D piechart
library(plotrix)
pie3D(Type,labels = piepercent,explode = 0.1, main = "3D Pie Chart of
Primary Crime Types", labelcex = 0.8)
legend("bottom", inset = .05, title = "Primary Crime Type",legend= dd$Primary.Type.new,fill = rainbow(length(Type)), horiz=TRUE,cex = 0.6)
I hesitate to post this since this is an absolutely terrible use case for a pie chart, but it's possible to make it a bit more readable and color-blind friendly:
structure(c(ARSON = 833L, ASSAULT = 30743L, BATTERY = 91237L,
BURGLARY = 29298L, `CRIMINAL DAMAGE` = 57539L, `CRIMINAL TRESPASS` = 14353L,
`DECEPTIVE PRACTICE` = 17472L, HOMICIDE = 640L, KIDNAPPING = 517L,
`MOTOR VEHICLE THEFT` = 23724L, NARCOTOCS = 55685L, `OFFENSE INVOLVING CHILDREN` = 3347L,
`OTHER OFFENSE` = 30878L, `PUBLIC OFFENSE` = 3833L, `PUBLIC PEACE VIOLATION` = 3632L,
ROBBERY = 18891L, `SEX CRIME` = 9331L, THEFT = 103255L, `WEAPONS VIOLATION` = 4792L
), .Dim = 19L, .Dimnames = list(. = c("ARSON", "ASSAULT", "BATTERY",
"BURGLARY", "CRIMINAL DAMAGE", "CRIMINAL TRESPASS", "DECEPTIVE PRACTICE",
"HOMICIDE", "KIDNAPPING", "MOTOR VEHICLE THEFT", "NARCOTOCS",
"OFFENSE INVOLVING CHILDREN", "OTHER OFFENSE", "PUBLIC OFFENSE",
"PUBLIC PEACE VIOLATION", "ROBBERY", "SEX CRIME", "THEFT", "WEAPONS VIOLATION"
)), class = "table") -> Type
Order the slices (IMPORTANT):
Type <- sort(Type, decreasing = TRUE)
Proper % and decent labels:
piepercent <- scales::percent(as.numeric(Type/sum(Type)))
Margins:
par(mar = c(1, 1, 1, 1)) # bltr
pie(
Type,
edges = 200,
radius = 0.8,
clockwise = TRUE, # IMPORTANT
angle = 45,
col = viridis::viridis_pal(option = "magma", direction=-1)(length(Type)), # BETTER COLOR PALETTE
labels = tail(piepercent, -7), # NEVER DISPLAY OVERLAPPING LABELS
cex = 0.7
)
legend(
x = 1.2, # DELIBERATE POSITION
y = 0.5, # DELIBERATE POSITION
inset = .05,
title = "Primary Crime Type",
legend = names(Type), # YOU WERE PASSING IN _ALL_ THE REPEAT NAMES
fill = viridis::viridis_pal(option = "magma", direction=-1)(length(Type)), # USE THE SAME COLOR PALETTE
horiz = FALSE,
cex = 0.6, # PROPER PARAMETER FOR TEXT SIZE
text.width = 0.7 # SET THE BOX WIDTH
)
Add the title manually:
title("Pie Chart of Primary Crime Types", line = -1)
Can't let a pie chart stand alone (and, now, a 3D one at that):
structure(list(cat = c("Arson", "Assault", "Battery", "Burglary",
"Criminal Damage", "Criminal Trespass", "Deceptive Practice",
"Homicide", "Kidnapping", "Motor Vehicle Theft", "Narcotocs",
"Offense Involving Children", "Other Offense", "Public Offense",
"Public Peace Violation", "Robbery", "Sex Crime", "Theft", "Weapons Violation"
), val = c(833, 30743, 91237, 29298, 57539, 14353, 17472, 640,
517, 23724, 55685, 3347, 30878, 3833, 3632, 18891, 9331, 103255,
4792), pct = c(0.001666, 0.061486, 0.182474, 0.058596, 0.115078,
0.028706, 0.034944, 0.00128, 0.001034, 0.047448, 0.11137, 0.006694,
0.061756, 0.007666, 0.007264, 0.037782, 0.018662, 0.20651, 0.009584
)), class = "data.frame", row.names = c(NA, -19L)) -> xdf
dplyr::arrange(xdf, pct) %>%
dplyr::mutate(cat = factor(cat, levels=cat)) %>%
dplyr::mutate(lab = sprintf("%s (%s)", scales::comma(val), scales::percent(pct))) %>%
ggplot(aes(pct, cat)) +
geom_segment(aes(xend=0, yend=cat), size=4, color = "#617a89") +
geom_label(
aes(label=lab), label.size = 0, hjust=0, nudge_x=0.001,
size = 3, family = hrbrthemes::font_rc, color = "#909495"
) +
hrbrthemes::scale_x_percent(expand=c(0,0.001), limits=c(0,0.25)) +
labs(x = NULL, y = NULL, title = "'Theft', 'Battery' & 'Criminal Damage' Account\nfor Half of Primary Recorded Crime Types") +
hrbrthemes::theme_ipsum_rc(grid="X") +
theme(axis.text.x = element_blank())
How I got xdf:
readLines(textConnection("ARSON ASSAULT BATTERY BURGLARY
833 30743 91237 29298
CRIMINAL_DAMAGE CRIMINAL_TRESPASS DECEPTIVE_PRACTICE HOMICIDE
57539 14353 17472 640
KIDNAPPING MOTOR_VEHICLE_THEFT NARCOTOCS OFFENSE_INVOLVING_CHILDREN
517 23724 55685 3347
OTHER_OFFENSE PUBLIC_OFFENSE PUBLIC_PEACE_VIOLATION ROBBERY
30878 3833 3632 18891
SEX_CRIME THEFT WEAPONS_VIOLATION
9331 103255 4792")) %>%
trimws() %>%
stri_split_regex("[[:space:]]+") -> x
do.call(rbind.data.frame, lapply(seq.int(1, length(x), 2), function(i) {
data.frame(
cat = stri_trans_totitle(gsub("_", " ", x[[i]])),
val = as.numeric(x[[i+1]]),
stringsAsFactors = FALSE
)
})) %>%
mutate(pct = val/sum(val)) -> xdf

Count lines crossing raster cells with R

I have the following raster (shp file using readORG):
dput(summary_grid)
structure(list(class = structure("SpatialPolygonsDataFrame", package = "sp"),
bbox = structure(c(4346000, 3819000, 4445000, 3867000), .Dim = c(2L,
2L), .Dimnames = list(c("x", "y"), c("min", "max"))), is.projected = TRUE,
proj4string = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs",
data = structure(c("Min. :0 ", "1st Qu.:0 ", "Median :0 ",
"Mean :0 ", "3rd Qu.:0 ", "Max. :0 "), .Dim = c(6L,
1L), .Dimnames = list(c("", "", "", "", "", ""), " Id"), class = "table")), .Names = c("class",
"bbox", "is.projected", "proj4string", "data"), class = "summary.Spatial")
as well as the shp file containing lines:
dput(summary_lines)
structure(list(class = structure("SpatialLinesDataFrame", package = "sp"),
bbox = structure(c(4329488.96922647, 3429159.10800761, 4998503.48859431,
4055688.10547651), .Dim = c(2L, 2L), .Dimnames = list(c("x",
"y"), c("min", "max"))), is.projected = TRUE, proj4string = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs",
data = structure(c("Min. :0 ", "1st Qu.:0 ", "Median :0 ",
"Mean :0 ", "3rd Qu.:0 ", "Max. :0 ", "Min. : 448 ",
"1st Qu.:13229 ", "Median :28235 ", "Mean :27205 ",
"3rd Qu.:40724 ", "Max. :50608 ", "Min. : 485 ",
"1st Qu.:13731 ", "Median :29399 ", "Mean :28635 ",
"3rd Qu.:43159 ", "Max. :53607 ", "Min. :6519754 ",
"1st Qu.:8741416 ", "Median :9167928 ", "Mean :8894830 ",
"3rd Qu.:9414708 ", "Max. :9762259 ", "Fishing:121 ",
NA, NA, NA, NA, NA), .Dim = c(6L, 5L), .Dimnames = list(c("",
"", "", "", "", ""), c(" Id", " trip_id", " new_tr_id",
" species", " HELCOM_Gro")), class = "table")), .Names = c("class",
"bbox", "is.projected", "proj4string", "data"), class = "summary.Spatial")
In this shp file with the lines, each lines has a unique identification number called new_tr_id.
My goal is to create a raster file with the number if lines crossing each cell. A line (so the same new_tr_id) can cross the same cell X times, it would be counted X times. I added the figure below as example:
I have some interesting discussions using the package raster but I still can not find a proper answer to this issue.
Help is more than welcome :)
That is tricky. Counting is standard, and counting the total length is not that hard. But I think I come with something.
Example data
library(raster)
cds1 <- rbind(c(-180,-20), c(-140,55), c(10, 0), c(-140,-60))
cds2 <- rbind(c(-10,0), c(140,60), c(160,0), c(140,-55))
cds3 <- rbind(c(-125,0), c(0,60), c(40,5), c(15,-45))
lns <- spLines(cds1, cds2, cds3, crs="+proj=utm +zone=1 +datum=WGS84")
r <- raster(ncols=9, nrows=5, vals=1:45, crs="+proj=utm +zone=1 +datum=WGS84")
Standard count:
x <- rasterize(lns, r, fun='count')
We can break the line segments up by the raster cell borders (this will not work for very large rasters).
rsp <- rasterToPolygons(r)
lns2 <- intersect(lns, rsp)
r <- rasterize(lns2, r, fun='count')
But the result is not correct because if a segment touches a cell, it is counted as in.
So another approach would be to cut them and remove just a little at the edges. There are two parameters that you will have to play with for your data
wdth = .25
smallp = 100
b <- buffer(rsp, dissolve=FALSE, width=wdth)
j <- intersect(b, b)
j$area <- area(j)
small <- j[j$area < smallp, ]
small <- aggregate(small)
lns3 <- erase(lns, small)
r <- rasterize(lns3, r, fun='count')
plot(r)
lines(rsp)
plot(lns, col=rainbow(3), lwd=2, add=T)
Quite a work around, but the result looks good.

multivariate k-means cluster

I'm trying to do a multivariate k-means cluster plot in r. I have 3 variables, and 10 columns of data, plus the context (like species for Iris) so 11 variables. And my x is PeruReady, obviously
Following a tutorial online I got this far:
PeruReady.km <- kmeans(PeruReady[, -1], 3, iter.max=1000)
tbl <- table(PeruReady[, 1], PeruReady.km$cluster)
PeruReady.dist <- dist(PeruReady[, -1])
PeruReady.mds <- cmdscale(PeruReady.dist)
c.chars <- c("*", "o", "+")[as.integer(PeruReady$Context)]
a.cols <- rainbow(3)[PeruReady$cluster]
plot(PeruReady.mds, col=a.cols, pch=c.chars, xlab="X", ylab="Y")
But my plot is coming up completely empty, what am I doing wrong?
With a small data set (demand.sm), your code worked just fine. Have you normalized all your numeric columns?
dput(demand.sm)
structure(list(Demand = c("rify la", "p quasi", "rify LD", "ventive",
"ekeeper", " de min", " risk g", " approv", "uest te", "", "al trai",
"cation", "ely inv", "rge tim", "get of ", "vey pro", "ent ONA",
"ble sel", "cipline", "tus rep", "ced-ran"), normalized = structure(c(-1.15780226157481,
-0.319393727330983, -1.15780226157481, -1.15780226157481, -0.319393727330983,
-0.319393727330983, -0.319393727330983, -0.319393727330983, 0.519014806912847,
0.519014806912847, 0.519014806912847, -0.738597994452898, -0.738597994452898,
2.19583187540051, 2.19583187540051, -1.15780226157481, -0.319393727330983,
-0.319393727330983, 0.519014806912847, 1.35742334115668, 0.519014806912847
), .Dim = c(21L, 1L), "`scaled:center`" = 3.76190476190476, "`scaled:scale`" = 2.38547190100328)), .Names = c("Demand",
"normalized"), row.names = c(NA, -21L), class = "data.frame")
clusters <- kmeans(demand.sm[ , "normalized"], 5)
demand.dist <- dist(demand.sm[ , "normalized"])
demand.mds <- cmdscale(demand.dist) # multidimensional scaling of data matrix, aka principal coordinates analysis
c.chars <- c("*", "o", "+")[as.integer(clusters$Context)]
a.cols <- rainbow(3)[clusters$cluster]
plot(demand.mds, col=a.cols, pch=c.chars, xlab="X", ylab="Y")

Resources