Plotting points in map (long and lat) ggplot2 - r

I'm trying to plot points in a map (shapefile) but I can't do it. I've transformed the shapefile with lat and long st_transform('+init=epsg:4326') because my dat has lat and long information.
Then I plot but doesn't work.
map = st_read('nxprovincias.shp') %>%
sf::st_transform('+init=epsg:4326')
# I've tried using `geom_sf`
dat1 = dat %>%
st_as_sf(coords = c("long", "lat"), crs=4326)
ggplot() +
geom_sf(data = map) +
geom_sf(data = dat1, aes(geometry = geometry))
# and `geom_point`
ggplot() +
geom_sf(data = map) +
geom_point(data = dat, aes(x = long, y = lat))
Data (long, lat, x)
dat = structure(list(lat = structure(c(-2.87660479545593, -2.87720417976379,
-2.87735748291016, -2.87753105163574, -2.87765717506409, -2.87785005569458,
-2.87821888923645, -2.87918782234192, -2.87929964065552, -2.87667083740234,
-2.87697672843933, -2.87707996368408, -2.87767362594604, -2.87771010398865,
-2.87813591957092, -2.8781750202179, -2.8784019947052, -2.87957549095154,
-2.87958645820618, -2.87968635559082, -2.87970232963562, -2.87977194786072,
-2.87977933883667, -2.87978482246399, -2.87985110282898, -2.87985396385193,
-2.87991166114807, -2.87996673583984, -2.87998247146606, -2.88041758537292,
-2.9928183555603, -99, -2.87677383422852, -2.87691879272461,
-2.87718558311462, -2.87721037864685, -2.87743043899536, -2.87768173217773,
-2.87944602966309, -2.87797331809998, -2.87819075584412, -2.87830853462219,
-2.87849140167236, -2.8785994052887, -2.87917923927307, -2.87923359870911,
-2.87934041023254, -2.87948775291443, -2.88050103187561, -2.88078212738037,
-2.88109421730042, -2.88113117218018, -2.88172602653503, -2.88214111328125,
-2.88219523429871, -2.87862133979797, -2.88026261329651, -2.88060832023621,
-2.88061451911926, -2.88077187538147, -2.88077616691589, -2.88100337982178,
-2.88157868385315, -2.8817310333252, -2.88299989700317, -2.89299464225769,
-2.88181924819946, -2.88214421272278, -2.88239336013794, -2.88244104385376,
-2.88291192054749, -2.88306641578674, -2.87702965736389, -2.87748551368713,
-2.87786865234375, -2.87825655937195, -2.87838006019592, -2.88284087181091,
-2.87875247001648, -2.88032579421997, -2.88060545921326, -2.87759780883789,
-2.87762522697449, -2.8776683807373, -2.87819457054138, -2.87915062904358,
-2.87936305999756, -2.87957811355591, -2.87959146499634, -2.87961769104004,
-2.88021159172058, -2.88076829910278, -2.88081574440002, -2.88141989707947,
-2.87116622924805, -2.87180852890015, -2.87283968925476, -2.87302923202515,
-2.87308740615845, -2.87557435035706), format.stata = "%9.0g"),
long = structure(c(-79.0676956176758, -79.0701141357422,
-79.0691986083984, -79.067756652832, -79.0691986083984, -79.0691986083984,
-79.0691528320312, -79.0684051513672, -79.0679779052734,
-79.0663223266602, -79.0669784545898, -79.0658645629883,
-79.0663909912109, -79.0657043457031, -79.0673751831055,
-79.0672378540039, -79.0664520263672, -79.0665512084961,
-79.0662689208984, -79.0657424926758, -79.0663528442383,
-79.0650329589844, -79.0666046142578, -79.0664138793945,
-79.0665588378906, -79.0667877197266, -79.0649337768555,
-79.0649490356445, -79.0650482177734, -79.064826965332, -79.0410537719727,
-99, -79.0623397827148, -79.0616836547852, -79.0619812011719,
-79.061897277832, -79.0632171630859, -79.0630722045898, -79.061653137207,
-79.0590896606445, -79.0603866577148, -79.0595092773438,
-79.0588912963867, -79.0578765869141, -79.0596008300781,
-79.0606155395508, -79.0592498779297, -79.0592041015625,
-79.0583572387695, -79.0598678588867, -79.0614395141602,
-79.0602798461914, -79.0587768554688, -79.0586318969727,
-79.0586547851562, -79.0604934692383, -79.0666580200195,
-79.0646667480469, -79.0649719238281, -79.0640106201172,
-79.0656890869141, -79.0631713867188, -79.059700012207, -79.0645904541016,
-79.0590209960938, -78.9783630371094, -79.0576248168945,
-79.0585327148438, -79.0580749511719, -79.0582504272461,
-79.0576858520508, -79.0575942993164, -79.0545349121094,
-79.0535278320312, -79.0556869506836, -79.0555191040039,
-79.0541076660156, -79.0554046630859, -79.0519485473633,
-79.052360534668, -79.052848815918, -79.0486145019531, -79.0485687255859,
-79.0481719970703, -79.0492935180664, -79.0472640991211,
-79.0477523803711, -79.0483016967773, -79.04833984375, -79.0483245849609,
-79.047981262207, -79.0500640869141, -79.0481643676758, -79.0477676391602,
-79.0512161254883, -79.050537109375, -79.0501861572266, -79.0501327514648,
-79.0500335693359, -79.0494155883789), format.stata = "%9.0g"),
x = structure(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1,
0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 0, 1, 0,
2, 0, 0, 0, 2, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0), format.stata = "%8.0g")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))

Can you elaborate a bit on what exactly did not work? There is one point with invalid coordinates in your data set:
subset(
dat
, lat < -90
)
# # A tibble: 1 × 3
# lat long x
# <dbl> <dbl> <dbl>
# -99 -99 2
Remove it before converting to sf and your plot call works just fine:
dat1 = dat |>
subset(
lat >= -90
) |>
st_as_sf(
coords = c("long", "lat")
, crs = 4326
)
ggplot() +
geom_sf(data = map) +
geom_sf(data = dat1, aes(geometry = geometry), color = "red") +
theme_minimal()
BTW, you might want to think about switching to something more interactive (like mapview) for exploratory analysis of spatial data:
library(mapview)
m_map = mapview(
map
, legend = FALSE
)
m_dat = mapview(
dat1
, layer.name = "x"
)
m_map +
m_dat

Related

Aggregate similar constructs/ FA with binary variables

I would like to aggregate, in order to reduce the number of constructs, its following data frame containing only binary variables that correspond to "yes/no", its following data frame (first 10 row). The original data frame contains 169 rows.
outcome <-
structure(list(Q9_Automazione.processi = c(0, 0, 0, 0, 0, 0,
1, 1, 1, 0), Q9_Velocita.Prod = c(1, 0, 0, 1, 0, 0, 1, 1, 1,
0), Q9_Flessibilita.Prod = c(0, 0, 0, 1, 0, 0, 1, 1, 0, 1), Q9_Controllo.processi = c(0,
0, 0, 1, 0, 0, 1, 1, 0, 0), Q9_Effic.Magazzino = c(0, 0, 0, 1,
0, 0, 0, 0, 0, 0), Q9_Riduz.Costi = c(0, 1, 0, 0, 0, 0, 0, 0,
0, 1), Q9_Miglior.Sicurezza = c(0, 0, 0, 0, 0, 0, 1, 0, 1, 1),
Q9_Connett.Interna = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 0), Q9_Connett.Esterna = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), Q9_Virtualizzazione = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0), Q9_Innov.Prod = c(0, 0, 0, 0, 0,
1, 0, 0, 0, 1), Q9_Person.Prod = c(0, 1, 0, 1, 0, 1, 0, 0,
0, 1), Q9_Nuovi.Mercati = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Q9_Nuovi.BM = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Q9_Perform.Energ = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), Q9_Perform.SostAmb = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 10L), class = "data.frame")
I have tried performing factor analysis via the tethracoric method on the obtained correlation matrix ( the obtained value from the KMO function turns out to be inadequate) both directly on the dataframe and then using tethracoric correletions in fafunction (using cor = "tet" I get a negative Tucker Lewis Index).
I have been reading up on this but cannot find a methodology that is adequate and of which I am certain of the correctness of the analysis.
So basically what I would like to achieve is to aggregate similar constructs, e.g., assess whether column 5 has value 1 (i.e., "yes") almost always when column 11 has value 1 and then aggregate.
Here the code that I try to used
library(psych)
tet <- tetrachoric(outcome)
corrplot(tet$rho, "ellipse", tl.cex = 0.75, tl.col = "black")
par(mfrow = c(1,2))
corr_matrix %>%
ggcorrplot(show.diag = F,
type="lower",
lab=TRUE,
lab_size=2)
KMO(corr_matrix)
cortest.bartlett(corr_matrix)
fa.parallel(corr_matrix, fm = "ml")
factor <- fa(corr_matrix, nfactors = 3, rotate = "oblimin", fm = "ml")
print(factor, cut = 0.3, digits = 3)
# -------- Pearson --------
cor(outcome, method = 'pearson', use = "pairwise.complete.obs") %>%
ggcorrplot(show.diag = F,
type="lower",
lab=TRUE,
lab_size=2)
KMO(outcome)
cortest.bartlett(outcome)
fa.parallel(outcome)
factor1 <- fa(outcome, nfactors = 3, rotate = "oblimin", cor = "tet", fm = "ml")
print(factor1, cut = 0.3, digits = 3)

How to work with annual country data in R?

It's my first time using R. I want to create a scatterplot with a line of best fit for a decade of data about all countries. I joined two excel datasets - one has the number of people jailed for a certain crime by country in a given year (rows: country, columns year:, the other has average income for a certain population group (rows: country, columns: year).
dataclean=inner_join(EnforcementData, IncomeData, by = "Country")
This gives me a dataset with x, y points where enforcement is the x and income is the y
I want to plot this and find the outliers - so those countries where enforcement is out of step with income. I tried:
ggplot(dataclean, aes(x=EnforcementData, y=IncomeData, group= "Country")) +
geom_line(aes(color = "Country")
Thanks for any suggestions!
EDIT: I think I've improperly merged the datasets somehow, as it returns a matrix. Like this:
dput(head(dataclean))
structure(list(Country = c("Albania", "Algeria", "Angola", "Antigua and Barbuda",
"Argentina", "Armenia"), 2006.x = c(0, 0, 0, 0, 0, 0), 2007.x = c(0,
0, 0, 0, 0, 0), 2008.x = c(0, 0, 0, 0, 3, 0), 2009.x = c(0,
0, 0, 0, 2, 0), 2010.x = c(0, 0, 3, 0, 0, 0), 2011.x = c(0,
0, 0, 0, 4, 0), 2012.x = c(0, 0, 0, 0, 2, 0), 2013.x = c(1,
1, 3, 0, 3, 0), 2014.x = c(0, 0, 0, 0, 1, 0), 2015.x = c(0,
0, 1, 1, 0, 0), 2016.x = c(0, 0, 5, 1, 5, 0), 2017.x = c(0,
0, 3, 0, 0, 0), 2018.x = c(0, 0, 0, 0, 0, 0), 2019.x = c(0,
1, 3, 0, 0, 0), 2020.x = c(0, 1, 0, 0, 0, 0), 2006.y = c(3.273755,
2.9912451, 3.689971, 1.342365, 2.8111637, 3.1407325), 2007.y = c(3.157699,
3.0298389, 3.759603, 1.315153, 2.8102016, 3.2122944), 2008.y = c(3.0636166,
3.0644794, 3.754531, 1.181255, 2.9054865, 3.1780076), 2009.y = c(3.0084051,
3.0477934, 3.874565, 1.144331, 2.9149061, 3.0896677), 2010.y = c(2.9951254,
2.9948973, 3.796005, 1.161454, 2.8314702, 3.1664003), 2011.y = c(3.1528966,
3.0144704, 3.814187, 1.190574, 2.8360401, 3.1267727), 2012.y = c(3.1964009,
2.9731618, 3.73838, 1.201921, 2.913096, 3.0577149), 2013.y = c(3.1683419,
2.943247, 3.779373, 1.209151, 2.9020493, 3.0017037), 2014.y = c(3.0180735,
3.0699088, 3.913854, 1.8298544, 3.0114942, 2.9938708), 2015.y = c(2.9489451,
3.1155215, 3.864924, 1.7799824, 3.0169873, 3.0037498), 2016.y = c(2.8750588,
3.1476701, 3.909438, 1.7761061, 2.7538409, 3.041738), 2017.y = c(2.8906318,
3.0717401, 3.880863, 2.2256225, 2.7280908, 3.0332232), 2018.y = c(2.9485421,
3.12678, 3.609102, 2.1923678, 2.5386973, 2.8175096), 2019.y = c(3.0029988,
3.0910585, 3.524361, 2.1915031, 2.5461976, 2.6481938), 2020.y = c(1.9297139,
3.1117555, 3.3970031, 2.1946293, 2.5862916, 2.438313)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))

Adding 2 DF's of Different Sizes Together

I have two DF's:
passesComb <- structure(list(P1_Good = c(0, 1, 0, 0, 0, 0, 1), P2_Good = c(2,
0, 0, 0, 0, 0, 2), P3_Good = c(0, 1, 0, 0, 0, 0, 1), P4_Good = c(0,
0, 1, 0, 0, 0, 1), P5_Good = c(0, 0, 0, 1, 0, 0, 1), P1_Bad = c(0,
0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0, 0, 0, 0, 0, 0), P3_Bad = c(0,
0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 1, 0, 0, 0, 1), P5_Bad = c(0,
0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0, 1, 0, 0, 1, 1), `Good Pass` = c(2,
2, 1, 1, 0, 3, 6), `Intercepted Pass` = c(0, 0, 0, 0, 0, 1, 0
), Turnover = c(0, 0, 0, 0, 0, 1, 0), totalEvents = c(2, 2, 2,
1, 0, 6, 7)), row.names = c("P1", "P2", "P3", "P4", "P5", "Opponent",
"VT"), class = "data.frame")
of size 7x15, and
copyComb <- structure(list(P1_Good = c(0, 1, 0, 0, 0, 1), P2_Good = c(2,
0, 0, 0, 0, 2), P4_Good = c(0, 0, 0, 0, 0, 0), P5_Good = c(0,
0, 1, 0, 0, 1), P1_Bad = c(0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0,
0, 0, 0, 0), P3_Bad = c(0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 0,
0, 0, 0), P5_Bad = c(0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0,
0, 0, 1, 0), `Good Pass` = c(2, 1, 1, 0, 3, 4), `Intercepted Pass` = c(0,
0, 0, 0, 1, 0), Turnover = c(0, 0, 0, 0, 1, 0), totalEvents = c(2,
1, 1, 0, 6, 4)), row.names = c("P1", "P2", "P4", "P5", "Opponent",
"VT"), class = "data.frame")
or simply,
copyComb <- passesComb
copyComb <- copyComb[-3,-3]
#Updating specific cells since [3,3] is removed
copyComb[2,11] <- 1
copyComb[2,14] <- 1
copyComb[6,8] <- 0
copyComb[6,3] <- 0
copyComb[6,10] <- 0
copyComb[6,11] <- 4
copyComb[6,14] <- 4
#This now equals the copyComb from dput() above
of size 6x14.
I am trying to combine/add these two df's together based on matching row/column names. I tried to achieve this using the code from the answer to this post
gamesComb <- data.frame(matrix(NA, nrow = ifelse(nrow(passesComb) >= nrow(copyComb), nrow(passesComb),nrow(copyComb)),
ncol = ifelse(ncol(passesComb) >= ncol(copyComb), ncol(passesComb),ncol(copyComb))))
gamesComb[row.names(ifelse(nrow(passesComb) >= nrow(copyComb), passesComb, copyComb)),
colnames(ifelse(ncol(passesComb) >= ncol(copyComb), passesComb, copyComb))] <- passesComb
Here, I create a df, gamesComb and set the dimensions of whichever passesComb or copyComb is bigger. It does create a 7x15 df, but doesn't add the row/col names.
I also am trying to then add the 2 df's together based on the cell value if they have the same row/col name (same as in the post link above), i.e. passesComb["P2","P1_Good"] = 1 and copyComb["P2","P1_Good"] = 1, so gamesComb["P2","P1_Good"] should = 2, and same for all similar row/col names.
So the final result look like:
expectedOutput <- structure(list(P1_Good = c(0, 2, 0, 0, 0, 0, 2), P2_Good = c(4,
0, 0, 0, 0, 0, 4), P3_Good = c(0, 1, 0, 0, 0, 0, 1), P4_Good = c(0,
0, 1, 0, 0, 0, 1), P5_Good = c(0, 0, 0, 2, 0, 0, 2), P1_Bad = c(0,
0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0, 0, 0, 0, 0, 0), P3_Bad = c(0,
0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 1, 0, 0, 0, 1), P5_Bad = c(0,
0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0, 1, 0, 0, 2, 1), `Good Pass` = c(4,
3, 1, 2, 0, 6, 10), `Intercepted Pass` = c(0, 0, 0, 0, 0, 2,
0), Turnover = c(0, 0, 0, 0, 0, 2, 0), totalEvents = c(4, 3,
2, 2, 0, 12, 11)), row.names = c("P1", "P2", "P3", "P4", "P5",
"Opponent", "VT"), class = "data.frame")
Here's a dplyr/tidyr approach where I reshape each table into a long format, then join them, sum, and pivot wider again.
library(dplyr); library(tidyr)
lengthen <- function(df) { df %>% rownames_to_column(var = "row") %>% pivot_longer(-row)}
full_join(lengthen(passesComb), lengthen(copyComb), by = c("row", "name")) %>%
mutate(new_val = coalesce(value.x, 0) + coalesce(value.y, 0)) %>%
select(-starts_with("value")) %>%
pivot_wider(names_from = name,values_from = new_val)
Another option is to stack them and then sum by rowname groups.
library(dplyr, warn.conflicts = FALSE)
library(tibble)
out <-
rownames_to_column(passesComb) %>%
bind_rows(rownames_to_column(copyComb)) %>%
# bind_rows(rownames_to_column(third_table)) %>% if you want to add another
select(rowname, names(passesComb)) %>%
group_by(rowname) %>%
summarise(across(everything(), sum, na.rm = T)) %>%
slice(match(rownames(passesComb), rowname)) %>%
column_to_rownames('rowname')
all.equal(out, expectedOutput)
#> [1] TRUE
Created on 2021-10-09 by the reprex package (v2.0.1)

How to create and export multiple plots to jpeg format in r?

I have been creating a bar plot for the result of a sentiment analysis model in R. The data is very confidential feedbacks from the customers. So, the feedbacks are then fed into a sentiment analysis model to generate outputs. My work is to generate a chart for each combination for example zone = delhi and delhi has sub zones like eastdelhi, westdelhi,northdelhi,southdelhi. I want to generate charts with combination like
zone = delhi and sub-zone = eastdelhi. And I want to save it to a jpeg file.I have written a for loop to do so. But for some reason it isn't working. This is the code
#Set locales
rm(list = ls())
Sys.setlocale(category = "LC_ALL",locale = "English")
#Load libraries
LoadLibraries <- c("openxlsx",
"dplyr",
"tidyr",
"plotly",
"RColorBrewer",
"shiny",
"officer",
"parallel",
"dplyr",
"tidyr",
"magrittr",
"knitr")
lapply(LoadLibraries, require, character.only = TRUE)
path = "C:/Users/R_Visual/Data/visual_data.xlsx"
input_data <- read.xlsx(path)
name <- names(input_data[,1:10])
#Filtering the zones and circles
for (i in 1:length(unique(Zone.Final))){
for (j in 1:length(unique(Circle.Final))){
fileName = 'C:/Users/R_Visual/'+ str(i) + str(j) + '.jpeg'
jpeg(fileName, width = 900, height = 450)
df <- input_data %>%
filter(input_data$Zone.Final[i])
df <- df %>%
filter(df$Circle.Final[j])
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
plot <- barplot(sort(colSums(input_data[, 1:10])),
main = paste("Sentiment Analysis for Zone",df$Zone.Final[i]," and Circle",df$Circle.Final[j], sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1, cex = 0.8, y.intersp = 0.8, x.intersp = 0.25, horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
}
EDIT:
This is the sample of input_data
> dput(input_data)
structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
If anyone could help me with the code, it would be of great help.
You can try creating a list combining the zone and subzone:
#Data
input_data <- structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
#Code
#First create and global id to combine zone and subzone
df <- input_data
df$id <- paste(df$Zone.Final,df$Circle.Final,sep = '-')
#Split
List <- split(df,df$id)
#Plot
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
#Plot names
vnames <- paste0(names(List),'.jpeg')
#Loop
for(i in 1:length(List))
{
name <- names(List[[i]][, 1:10])
#Plot
jpeg(filename = vnames[i], width = 900, height = 450)
barplot(sort(colSums(List[[i]][, 1:10])),
main = paste("Sentiment Analysis for Zone ",
unique(List[[i]]$Zone.Final),
" and Circle ",unique(List[[i]]$Circle.Final), sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1,
cex = 0.8, y.intersp = 0.8, x.intersp = 0.25,
horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
That will create the plots. Of course you can add a path to vnames like the dir you have to save the plots in that folder.

Plotting with ggplot2: "Error: Discrete value supplied to continuous scale" on categorical y-axis

The plotting code below gives Error: Discrete value supplied to continuous scale
What's wrong with this code? It works fine until I try to change the scale so the error is there... I tried to figure out solutions from similar problem but couldn't.
This is a head of my data:
> dput(head(df))
structure(list(`10` = c(0, 0, 0, 0, 0, 0), `33.95` = c(0, 0,
0, 0, 0, 0), `58.66` = c(0, 0, 0, 0, 0, 0), `84.42` = c(0, 0,
0, 0, 0, 0), `110.21` = c(0, 0, 0, 0, 0, 0), `134.16` = c(0,
0, 0, 0, 0, 0), `164.69` = c(0, 0, 0, 0, 0, 0), `199.1` = c(0,
0, 0, 0, 0, 0), `234.35` = c(0, 0, 0, 0, 0, 0), `257.19` = c(0,
0, 0, 0, 0, 0), `361.84` = c(0, 0, 0, 0, 0, 0), `432.74` = c(0,
0, 0, 0, 0, 0), `506.34` = c(1, 0, 0, 0, 0, 0), `581.46` = c(0,
0, 0, 0, 0, 0), `651.71` = c(0, 0, 0, 0, 0, 0), `732.59` = c(0,
0, 0, 0, 0, 1), `817.56` = c(0, 0, 0, 1, 0, 0), `896.24` = c(0,
0, 0, 0, 0, 0), `971.77` = c(0, 1, 1, 1, 0, 1), `1038.91` = c(0,
0, 0, 0, 0, 0), MW = c(3.9, 6.4, 7.4, 8.1, 9, 9.4)), .Names = c("10",
"33.95", "58.66", "84.42", "110.21", "134.16", "164.69", "199.1",
"234.35", "257.19", "361.84", "432.74", "506.34", "581.46", "651.71",
"732.59", "817.56", "896.24", "971.77", "1038.91", "MW"), row.names = c("Merc",
"Peug", "Fera", "Fiat", "Opel", "Volv"
), class = "data.frame")
The plotting code:
## Plotting
meltDF = melt(df, id.vars = 'MW')
ggplot(meltDF[meltDF$value == 1,]) + geom_point(aes(x = MW, y = variable)) +
scale_x_continuous(limits=c(0, 1200), breaks=c(0, 400, 800, 1200)) +
scale_y_continuous(limits=c(0, 1200), breaks=c(0, 400, 800, 1200))
Here's how the plot looked before adding scale:
As mentioned in the comments, there cannot be a continuous scale on variable of the factor type. You could change the factor to numeric as follows, just after you define the meltDF variable.
meltDF$variable=as.numeric(levels(meltDF$variable))[meltDF$variable]
Then, execute the ggplot command
ggplot(meltDF[meltDF$value == 1,]) + geom_point(aes(x = MW, y = variable)) +
scale_x_continuous(limits=c(0, 1200), breaks=c(0, 400, 800, 1200)) +
scale_y_continuous(limits=c(0, 1200), breaks=c(0, 400, 800, 1200))
And you will have your chart.
Hope this helps
if x is numeric, then add scale_x_continuous(); if x is character/factor, then add scale_x_discrete(). This might solve your problem.
In my case, you need to convert the column(you think this column is numeric, but actually not) to numeric
geom_segment(data=tmpp,
aes(x=start_pos,
y=lib.complexity,
xend=end_pos,
yend=lib.complexity)
)
# to
geom_segment(data=tmpp,
aes(x=as.numeric(start_pos),
y=as.numeric(lib.complexity),
xend=as.numeric(end_pos),
yend=as.numeric(lib.complexity))
)

Resources