I am using ggplot2 to plot a pointrange() plot, and both the x and y labels are oddly separate from the end of their respective ticks. This doesn't happen with all of the plots in this particular script, only a few, including this one (which is based on a subset of the available data, but shows the problem):
As you can see, the y axis labels are offset substantially to the left, and the x axis labels are offset substantially below the ticks, to the extent that they are overplotted above the axis label.
The only modification I have made to theme_bw() prior to producing this plot is to set theme_set(theme_bw(base_size = 8)) -- no changes have been deliberately made to the text justification prior to the plot code.
Here is a dput() of the subset plotted below:
TestData:
structure(list(State = c("AL", "AZ", "CA", "CO", "CT", "DC",
"DE", "FL", "GA", "IL"), Year = c(2008L, 2008L, 2008L, 2008L,
2008L, 2008L, 2008L, 2008L, 2008L, 2008L), N = c(22L, 42L, 286L,
99L, 30L, 14L, 20L, 173L, 78L, 29L), Polarization = c(0.352923743188869,
0.505918664112271, 0.445768659699068, 0.555930347461176, -0.0133878043740006,
-0.380342319255035, -0.450998867087007, 0.385507917713463, 0.368070478718073,
0.23368733390603), PolarizationSE = c(0.16021790292877, 0.0650610761652209,
0.0100695976668952, 0.0270310803233059, 0.127526745827604, 0.296328985544823,
0.179653490097689, 0.0180113004747975, 0.0372516250664796, 0.112905812606479
), IDPol = c(0.198743353462518, 0.0416441096132583, 0.0551808637190376,
0.110549247724351, 0.302497569072991, -0.0343523165297017, -0.00367975496702999,
0.0520660142625065, 0.0762126127715774, 0.0936515057040723),
IDPolSE = c(0.102763798140243, 0.0523842634480865, 0.00789292373693809,
0.023425554880421, 0.0918856966184178, 0.184867986813743,
0.122339223641891, 0.0137386656250425, 0.0285951418531372,
0.0896433805255375), Estimate = c(0.00932965761458826, -0.000412018017715892,
0.00315002626133457, 0.00823125148777124, 0.000741919819714724,
-0.0211994171332907, -0.0218353390160545, 0.00290805283382581,
0.00406489584624635, 0.00604261698709428), Std..Error = c(0.00398420082222495,
0.00483343236746232, 0.00186579338568264, 0.0032167092866312,
0.00379995092553099, 0.0128981988697743, 0.0122846784163747,
0.00220581166165486, 0.00335683359383524, 0.00240425995025825
), StateN = structure(c(13L, 25L, 18L, 27L, 4L, 2L, 1L, 15L,
14L, 7L), .Label = c("DE (20)", "DC (14)", "WA (23)", "CT (30)",
"TX (365)", "NY (123)", "IL (29)", "PA (36)", "MI (114)",
"KS (28)", "OK (36)", "NJ (23)", "AL (22)", "GA (78)", "FL (173)",
"SC (69)", "MN (25)", "CA (286)", "OH (85)", "VA (34)", "IN (55)",
"OR (27)", "WI (22)", "NM (64)", "AZ (42)", "TN (77)", "CO (99)",
"LA (83)", "MA (22)", "NC (65)", "MS (63)"), class = "factor")), .Names = c("State",
"Year", "N", "Polarization", "PolarizationSE", "IDPol", "IDPolSE",
"Estimate", "Std..Error", "StateN"), row.names = c(1401L, 1403L,
1404L, 1405L, 1406L, 1407L, 1408L, 1409L, 1410L, 1414L), class = "data.frame")
And here is the code used to produce the plot:
TestData$StateN <- paste(TestData$State, " (", TestData$N, ")", sep = "")
TestData$StateN <- factor(TestData$StateN, levels = TestData$StateN[order(TestData$Polarization)])
ZP17Test <- ggplot(TestData,
aes(x = StateN, y = Polarization,
ymin = Polarization - 1.96 * PolarizationSE, ymax = Polarization + 1.96 * PolarizationSE))
ZP17Test <- ZP17Test + geom_hline(yintercept = 0, colour = I(MyPalette(5)[3]), alpha = I(7/12), size = I(1/3))
ZP17Test <- ZP17Test + geom_pointrange(size = I(1/3))
ZP17Test <- ZP17Test + scale_x_discrete("State (Number of Respondents)")
ZP17Test <- ZP17Test + opts(title = "State Polarization Levels in 2008",
axis.text.x = theme_text(angle=45, hjust=1, size = 7))
print(ZP17Test)
ggsave(plot = ZP17Test, "Analysis/Stack_Overflow.png", h = 4, w = 6)
Thanks in advance for any help you can offer.
Angled axis text labels always messes with my head. What you wanted was this:
axis.text.x = theme_text(hjust = 1,vjust = 1,angle=45, size = 7)
The order you specify them in makes a difference, in my experience. I always have to fiddle with it until I get it just right. Smarter folks can probably remember the system to it.
Related
Currently I have a dataframe consisting of several flight as such
Using ggplot as shown below, I have managed to plot the flight path from origin to destination however cannot seem to change the path line to gradient colour that can visualise the flight from origin to destination.
Do advise as from my understanding, ggplot colour must be reliant on a variable.
q4c%>%
mutate(TailNum = factor(x=TailNum, levels=c("N351UA","N960DL","N524", "N14998", "N355CA","N711UW", "N587AA", "N839UA","N941CA","N516UA"))) %>%
ggplot() + usMap2 +
geom_curve(aes(x=OriginLong, y=OriginLat, xend=DestLong, yend=DestLat, size=TotalDelay,color=TailNum),
curvature=0.2)+
scale_size_continuous(range = c(0.02, 0.5))+
geom_point(aes(x=OriginLong, y=OriginLat),
size=0.02) +
geom_point(aes(x=DestLong, y=DestLat),
size=0.02) +
facet_wrap(~TailNum)
edit:
I have tried ggforce::geom_link however it only shows solid colors instead of gradient as i added dummy sequence of 0,1 to get the color contrast
structure(list(Year = c(2005L, 2005L, 2005L, 2005L, 2005L, 2005L
), Month = c(1L, 1L, 1L, 1L, 1L, 1L), DayofMonth = c(1L, 1L,
1L, 1L, 1L, 1L), DepTime = c(1022L, 1025L, 1037L, 1054L, 1110L,
1111L), ArrTime = c(1527L, 1057L, 1209L, 1219L, 1454L, 1409L),
DepDelay = c(3L, 0L, -10L, -1L, 65L, -2L), ArrDelay = c(6L,
-3L, -11L, -27L, 52L, -8L), TotalDelay = c(9L, -3L, -21L,
-28L, 117L, -10L), TailNum = c("N351UA", "N524", "N14998",
"N941CA", "N355CA", "N587AA"), Origin = c("DEN", "PHX", "LBB",
"LGA", "SLC", "DFW"), Dest = c("CLT", "BUR", "IAH", "GSO",
"STL", "IND"), AirportOrigin = c("Denver Intl", "Phoenix Sky Harbor International",
"Lubbock International", "LaGuardia", "Salt Lake City Intl",
"Dallas-Fort Worth International"), OriginLong = c(-104.6670019,
-112.0080556, -101.8227778, -73.87260917, -111.9777731, -97.0372
), OriginLat = c(39.85840806, 33.43416667, 33.66363889, 40.77724306,
40.78838778, 32.89595056), AirportDest = c("Charlotte/Douglas International",
"Burbank-Glendale-Pasadena", "George Bush Intercontinental",
"Piedmont Triad International", "Lambert-St Louis International",
"Indianapolis International"), DestLong = c(-80.94312583,
-118.3584969, -95.33972222, -79.9372975, -90.35998972, -86.29438417
), DestLat = c(35.21401111, 34.20061917, 29.98047222, 36.09774694,
38.74768694, 39.71732917), id = 1:6, seqnum = c(1, 6, 1,
6, 1, 6)), row.names = c(NA, 6L), class = "data.frame")
dataframe
q4cc%>%
ggplot() + usMap2 +
geom_link2(aes(x=OriginLong, y=OriginLat, size=TotalDelay, colour=seqnum))+
scale_size_continuous(range = c(0.02, 1))+
scale_color_gradient(name="Journey Path", high="red", low="blue")+
scale_alpha_continuous(range=c(0.03,0.3))+
geom_point(aes(x=OriginLong, y=OriginLat),
colour="red",
size=0.02) +
facet_wrap(~TailNum)
New Plot
I am trying to create a time series plot that has multiple data over the years. I would like to just plot the years and get the data to run from start date to end date. Here I have converted the respective columns to dates and then combined them but I do not get the result I am looking for.
The data is available from this website: https://www.businessinsider.co.za/coronavirus-deaths-how-pandemic-compares-to-other-deadly-outbreaks-2020-4?r=US&IR=T
Something like this where the data doesn't start in the same year or end in the same year:
https://ichef.bbci.co.uk/news/410/cpsprodpb/6E25/production/_111779182_optimised-mortality-nc.png
(time period vs deaths caused)
library(lubridate)
library(ggplot2)
otherDiseaseData <- structure(list(ï..Disease = structure(c(11L, 2L, 12L, 6L, 3L,
1L, 9L, 7L, 13L, 4L, 5L, 8L, 10L), .Label = c("Asian Flu", "blackdeath",
"Cholera", "Covid 19", "Ebola", "HIV", "Hong Kong Flu", "Mers",
"Russian Flu", "Sars", "smallpox", "spanish flu", "Swine Flu"
), class = "factor"), Start = c(0L, 1347L, 1918L, 1981L, 1899L,
1957L, 1889L, 1968L, 2009L, 2019L, 2014L, 2012L, 2002L), End = c(1979L,
1351L, 1919L, 2020L, 1923L, 1958L, 1890L, 1970L, 2010L, 2020L,
2016L, 2020L, 2003L), Death = c(300000L, 225000000L, 50000L,
2360000L, 1500000L, 1100000L, 1000000L, 1000000L, 151700L, 101526L,
11300L, 866L, 774L)), class = "data.frame", row.names = c(NA,
-13L))
yrs <- otherDiseaseData$Start
yr <- as.Date(as.character(yrs), format = "%Y")
yStart <- year(yr)
yrs <- otherDiseaseData$End
yr <- as.Date(as.character(yrs), format = "%Y")
yStart <- year(yr)
otherDiseaseData$x <- paste(otherDiseaseData$Start,otherDiseaseData$End)
otherDiseaseData
ggplot(otherDiseaseData, aes(y = Death, x = otherDiseaseData$x),xlim=0000-2000) + geom_point()
I'm not sure I've fully understood what you're asking for, but my interpretation is this:
df <- reshape::melt(otherDiseaseData, measure.vars = c("Start", "End"))
ggplot(df %>% filter(Disease != "smallpox", Death != 225000000)) +
geom_line(aes(value,Death, colour = Disease), size = 2) +
theme_minimal() +
ggrepel::geom_label_repel(data = filter(df, Disease != "smallpox", Death != 225000000, variable != "Start"),
aes(label = Disease, x = value, y = Death)) +
scale_y_log10() +
theme(legend.position = "none", aspect.ratio = 1) +
ylab("Number of Deaths") + xlab("Year")
I've used the reshape package to reorganise the given data, and then ggrepel to label the bars. I've had to remove some data as it really throws the scale, which I've ended up making logarithmic to spread the data out a little. It gives you this plot:
It's not perfect but it might be heading in the right direction? Apologies if I've misunderstood what you were angling for.
I am trying to create some plots in a loop in the following way: What I want to achieve is to plot the plots below each other into one single plot. For this I've read that I could use par(mfrow) but I am unsure where to put it in my loop. At the moment single graphs are created which are divided into two but only the upper part is plotted.
I know that I could do it the following way:
par(mfrow=2:1)
plot(...)
plot(...)
but with this solution I would have to call the plot command everytime for each plot. In the end I would like to have about 20 very narrow plots stacked on top of each other and I don't want to call them everytime. How could I do this independently?
Here is the code I've been using so far:
xy <- structure(list(NAME = structure(c(2L, 2L, 1L, 1L), .Label = c("CISCO", "JOHN"), class = "factor"), ID = c(41L, 41L, 57L, 57L), X_START_YEAR = c(1965L, 1932L, 1998L, 1956L), Y_START_VALUE = c(960L, -45L, 22L, -570L), X_END_YEAR = c(1968L, 1955L, 2002L, 1970L), Y_END_VALUE = c(960L, -45L, 22L, -570L), LC = structure(c(1L, 1L, 2L, 2L), .Label = c("CA", "US"), class = "factor")), .Names = c("NAME", "ID", "X_START_YEAR","Y_START_VALUE", "X_END_YEAR", "Y_END_VALUE", "LC"), class = "data.frame", row.names = c(NA,-4L))
ind <- split(xy,xy$ID)
# Plots
for (i in ind){
xx = unlist(i[,grep('X_',colnames(i))])
yy = unlist(i[,grep('Y_',colnames(i))])
fname <- paste0(i[1, 'ID'],'.png')
png(fname, width=1679, height=1165, res=150)
par(mar=c(6,8,6,5))
par(mfrow=2:1)
plot(xx,yy,type='n', xlab=NA, ylab="Value [mm]",ylim = range(c(yy,-.5,.5)))
i <- i[,-1]
segments(i[,2],i[,3],i[,4],i[,5],lwd=2)
abline(h=0)
dev.off()
}
EDIT: Up until now I tried to use an approach by adding the resulting pngs in one plot after they are being created as described here (R: crop multiple pngs and combine them into a single plot), but this approach is not that straightforward because of problems with cropping and output resolution...
I am plotting several plots like in the code below (the sample code should generate three different graphs). What I am having problems with is the scaling of the y axis.
My question: How could I define my ylimit so that the abline at y=0 is always visible in my plots?
At the moment the yaxis is scaled automatically. I am looking for a solution in baseR.
xy <- structure(list(NAME = structure(c(2L, 3L, 1L, 1L), .Label = c("CISCO","JOHN", "STEPH"), class = "factor"), ID = c(41L, 49L, 87L, 87L), X_START_YEAR = c(1965L, 1948L, 1959L, 2003L), Y_START_VALUE = c(940L,-1760L, 110L, 866L), X_END_YEAR = c(2005L, 2000L, 2000L, 2007L), Y_END_VALUE = c(940L, -1760L, 110L, 866L), LC = structure(c(1L,1L, 2L, 2L), .Label = c("CA", "US"), class = "factor")), .Names = c("NAME", "ID", "X_START_YEAR", "Y_START_VALUE", "X_END_YEAR", "Y_END_VALUE","LC"), class = "data.frame", row.names = c(NA, -4L))
ind <- split(xy,xy$ID) # split by ID for different plots
# Plots
for (i in ind){
xx = unlist(i[,grep('X_',colnames(i))])
yy = unlist(i[,grep('Y_',colnames(i))])
fname <- paste0(i[1, 'ID'],'.png')
png(fname, width=1679, height=1165, res=150)
# Define ylim so that y=0 (abline) is always in plots
par(mar=c(6,8,6,5))
plot(xx,yy,type='n',main=unique(i[,1]), xlab="Time [Years]", ylab="Value [mm]")
abline(h=0, col = "gray60")
i <- i[,-1]
segments(i[,2],i[,3],i[,4],i[,5],lwd=2)
points(xx, yy, pch=21, bg='white', cex=0.8)
dev.off()
}
Just make sure to include some numbers above/below 0 and use range() to give max and min values above and below zero. How about including
ylim = range(c(yy,-.5,.5))
as a parameter to plot()
I am plotting several hundreds of graphs in a loop using the segments() function. Here is some somple data which creates two graphs.
xy <- structure(list(NAME = structure(c(2L, 2L, 1L, 1L), .Label = c("CISCO", "JOHN"), class = "factor"), ID = c(41L, 41L, 57L, 57L), X_START_YEAR = c(1965L, 1932L, 1998L, 1956L), Y_START_VALUE = c(960L, -45L, 22L, -570L), X_END_YEAR = c(1968L, 1955L, 2002L, 1970L), Y_END_VALUE = c(960L, -45L, 22L, -570L), LC = structure(c(1L, 1L, 2L, 2L), .Label = c("CA", "US"), class = "factor")), .Names = c("NAME", "ID", "X_START_YEAR","Y_START_VALUE", "X_END_YEAR", "Y_END_VALUE", "LC"), class = "data.frame", row.names = c(NA,-4L))
ind <- split(xy,xy$ID)
# Plots
for (i in ind){
xx = unlist(i[,grep('X_',colnames(i))])
yy = unlist(i[,grep('Y_',colnames(i))])
fname <- paste0(i[1, 'ID'],'.png')
png(fname, width=1679, height=1165, res=150)
par(mar=c(6,8,6,5))
plot(xx,yy,type='n',main=unique(i[,1]), xlab="Time [Years]", ylab="Value [mm]",ylim = range(c(yy,-.5,.5)))
i <- i[,-1]
segments(i[,2],i[,3],i[,4],i[,5],lwd=2)
points(xx, yy, pch=21, bg='white', cex=0.8)
abline(h=0, col = "gray60")
dev.off()
}
What I am attempting to do is to change this to a barplot with colorized groups (e.g. every value above 0 is in blue and below 0 in red). I have added a visualisation of what I am trying to achieve from one of the resulting plots.
As I understand from the barplot() function I could use my segments() command (segments(i[,2],i[,3],i[,4],i[,5]) for the setting of width option of each barplot.
My question: Does anyone have an idea how I could change this in order to get the height command out of my data? I am looking for a solution in baseR.
You can use rect for this:
lapply(ind, function(x) {
plot(unlist(x[, c(3, 5)]), unlist(x[, c(4, 6)]), type='n',
xlab='Time [Years]', ylab='Value [mm]', main=x[1, 1])
apply(x, 1, function(y) {
rect(y[3], min(y[4], 0), y[5], max(y[4], 0),
col=if(as.numeric(y[4]) < 0) 'red' else 'blue')
abline(h=0)
})
})