I have a dataset which has regions of a country, states and sales in that state. I want to visualize the mean of that dataset region wise and also a pareto chart to know which state contributes more to the overall regional sales. How to do this in R language. Please help as I'm new to R
#dput for dataset
Region <- c('South','South','South','South','South','Central','Central','Central','North','North','North','North','East','East','East','East','West','West','West','West')
State <- c('TAMIL NADU', 'TELANGANA,'ANDHRA PRADESH','KARNATAKA,'KERALA','MADHYA PRADESH','ORISSA','CHATTISGARH','DELHI','UTTARAKHAND','HARYANA','PUNJAB','ASSAM','MIZORAM','WB','BIHAR','GUJARAT','RAJASTHAN','MAHARASHTRA','GOA')
sales <- C(89,109,92,56,43,103,26,41,126,56,64,98,26,16,61,40,61,101,191,38)
The dataset somewhat looks like this
Region
State
Gdp
South
Tamil Nadu
89
South
Telangana
109
South
Karnataka
92
South
Andhra Pradesh
56
South
Kerala
43
Central
Madhya Pradesh
103
Central
Chattisgarh
26
Central
Orissa
41
North
Delhi
126
North
Punjab
56
North
Haryana
64
North
Uttarakhand
98
East
Assam
26
East
Mizoram
16
East
West Bengal
61
East
Bihar
40
West
Gujarat
61
West
Rajasthan
101
West
Maharashtra
191
West
Goa
38
You did not provide a desired output, so here is my guess at it..
library(data.table)
library(ggplot2)
# setDT(DT) #not needed if your data is already in data.table format
# Order decreasing Gdp
setorder(DT, -Gdp)
# Data wrangling
DT[, `:=`(meanGdp_region = mean(Gdp),
cumGdp = cumsum(Gdp)), by = Region]
DT[, State_f := factor(State, levels = State)]
# Plot
ggplot(data = DT, aes(x = State_f)) +
geom_col(aes(y = Gdp)) +
geom_line(aes(y = cumGdp, group = 1), color = "red") +
geom_hline(aes(yintercept = meanGdp_region), color = "blue") +
facet_wrap(~Region, nrow = 1, scales = "free_x") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(x = "")
sample data used
# Sample data
DT <- fread("Region State Gdp
South Tamil Nadu 89
South Telangana 109
South Karnataka 92
South Andhra Pradesh 56
South Kerala 43
Central Madhya Pradesh 103
Central Chattisgarh 26
Central Orissa 41
North Delhi 126
North Punjab 56
North Haryana 64
North Uttarakhand 98
East Assam 26
East Mizoram 16
East West Bengal 61
East Bihar 40
West Gujarat 61
West Rajasthan 101
West Maharashtra 191
West Goa 38")
Another output guess:
Region <- c('South','South','South','South','South','Central','Central','Central','North','North','North','North','East','East','East','East','West','West','West','West')
State <- c('TAMIL NADU', 'TELANGANA','ANDHRA PRADESH','KARNATAKA','KERALA','MADHYA PRADESH','ORISSA','CHATTISGARH','DELHI','UTTARAKHAND','HARYANA','PUNJAB','ASSAM','MIZORAM','WB','BIHAR','GUJARAT','RAJASTHAN','MAHARASHTRA','GOA')
sales <- c(89,109,92,56,43,103,26,41,126,56,64,98,26,16,61,40,61,101,191,38)
df <- data.frame(Region, State, sales)
df2 <- df %>%
arrange(desc(sales)) %>%
mutate(State = factor(State)) %>%
mutate(cumulative = cumsum(sales)) %>%
mutate(State = fct_inorder(df$State))
ggplot(df2, aes(x=State)) +
geom_bar(aes(y=sales), fill='blue', stat="identity") +
geom_point(aes(y=cumulative), color = rgb(0, 1, 0), pch=16, size=1) +
geom_path(aes(y=cumulative, group=1), colour="slateblue1", lty=3, size=0.9) +
theme(axis.text.x = element_text(angle=90, vjust=0.6)) +
labs(title = "Pareto Plot", x = 'State', y = 'Count')
it's great that you want to explore R. I found few mistakes, these vectors will not work, you forgot to put ' in few places and you should use c instead of C (in the code I grouped by colour States in diff. way compared to previous answer - hope you can choose what works for you).
library(ggplot2)
Region <- c('South','South','South','South','South','Central','Central','Central','North','North','North','North','East','East','East','East','West','West','West','West')
State <- c('TAMIL NADU', 'TELANGANA','ANDHRA PRADESH','KARNATAKA','KERALA','MADHYA PRADESH','ORISSA','CHATTISGARH','DELHI','UTTARAKHAND','HARYANA','PUNJAB','ASSAM','MIZORAM','WB','BIHAR','GUJARAT','RAJASTHAN','MAHARASHTRA','GOA')
sales <- c(89,109,92,56,43,103,26,41,126,56,64,98,26,16,61,40,61,101,191,38)
myDf <- data.frame(Region, State, sales, stringsAsFactors = FALSE)
str(myDf)
myDf <- myDf\[order(myDf$sales, decreasing=TRUE), \]
myDf$State <- factor(myDf$State , levels=myDf$State)
myDf$cumulative <- cumsum(myDf$sales)
ggplot(myDf, aes(x = State)) +
geom_bar(aes(y = sales, fill = Region), stat = "identity") +
geom_point(aes(y = cumulative), color = rgb(0, 1, 0), pch = 16, size = 1) +
geom_path(aes(y = cumulative, group = 1), colour = "slateblue1", lty = 3, size = 0.9) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.6)) +
labs(title = "Pareto Plot", x = 'States', y = 'Sales')]
I want a chart like this
I plot a pie chart in dashboard, but I want to plot a pie chart for the selected item in combobox, with the function plotly
my Data
State=c ('USA', 'Belgium', 'France','Russia')
totalcases= c(553, 226, 742,370)
totalrecovered=c(12,22,78,21)
totaldeath=c(48,24,12,22)
DTF = data.frame(State,totalcases,totalrecovered,totaldeath)
My code to plot one pie-chart:
labels=c("unrecovered","death","recovered")
USA=filter(DTF,DTF$State=="USA" )
USA=c(USA$Totalcases,USA$Totaldeath,USA$Totalrecovred)
p1= plot_ly(labels = ~labels,
values = ~USA, type = 'pie',
marker = list(colors = brewer.pal(7,"Spectral")))
p1
Thanks.
The problem is: your dataset is a total mess.(; Try this:
library(plotly)
library(RColorBrewer)
library(dplyr)
library(tidyr)
State=c ('USA', 'Belgium', 'France','Russia')
totalcases= c(553, 226, 742,370)
totalrecovered=c(12,22,78,21)
totaldeath=c(48,24,12,22)
DTF = data.frame(State,totalcases,totalrecovered,totaldeath)
dtf_long <- DTF %>%
pivot_longer(-State, names_to = "labels") %>%
mutate(labels = gsub("total", "", labels),
labels = ifelse(labels == "cases", "unrecovered", labels))
dtf_long
#> # A tibble: 12 x 3
#> State labels value
#> <fct> <chr> <dbl>
#> 1 USA unrecovered 553
#> 2 USA recovered 12
#> 3 USA death 48
#> 4 Belgium unrecovered 226
#> 5 Belgium recovered 22
#> 6 Belgium death 24
#> 7 France unrecovered 742
#> 8 France recovered 78
#> 9 France death 12
#> 10 Russia unrecovered 370
#> 11 Russia recovered 21
#> 12 Russia death 22
usa <- filter(dtf_long, State == "USA")
p1 <- usa %>%
plot_ly(labels = ~labels,
values = ~value, type = 'pie',
marker = list(colors = brewer.pal(7, "Spectral")))
p1
Created on 2020-04-04 by the reprex package (v0.3.0)
I have a dataset that looks like this:
LOCALITY numbers
1 Airoli 72
2 Andheri East 286
3 Andheri west 208
4 Arya Nagar 5
5 Asalfa 7
6 Bandra East 36
7 Bandra West 72
I want to plot bubbles (bigger the number bigger would be the bubble) inside the map of mumbai for each location in dataset.
I loaded the map of mumbai using 'maps' library but now I am not sure on how to plot these in the map. Is it possible to do in R ?
I used this to load the map:
library(ggmap)
library(mapproj)
maps <- get_map(location = 'Mumbai', zoom = 12)
ggmap(maps)
This should get you headed in the right direction, but be sure to check out the examples pointed out by #Jaap in the comments.
library(ggmap)
map <- get_map(location = "Mumbai", zoom = 12)
df <- data.frame(location = c("Airoli",
"Andheri East",
"Andheri West",
"Arya Nagar",
"Asalfa",
"Bandra East",
"Bandra West"),
values = c(72, 286, 208, 5, 7, 36, 72),
stringsAsFactors = FALSE)
locs_geo <- geocode(df$location)
df <- cbind(df, locs_geo)
df
# location values lon lat
# 1 Airoli 72 72.99348 19.15793
# 2 Andheri East 286 72.87270 19.11549
# 3 Andheri West 208 72.82766 19.13632
# 4 Arya Nagar 5 80.32170 26.48341
# 5 Asalfa 7 72.89514 19.10023
# 6 Bandra East 36 72.84935 19.06053
# 7 Bandra West 72 72.83625 19.06069
ggmap(map) +
geom_point(data = df, aes(x = lon, y = lat, size = values))
I have a dataframe called "drawdata":
GeoName Ranking
1 Alabama 15
2 Alaska 2
3 Arizona 28
4 Arkansas 12
5 California 19
6 Colorado 7
7 Connecticut 42
8 Delaware 37
9 District of Columbia 9
10 Florida 38
11 Georgia 11
12 Hawaii 48
13 Idaho 10
14 Illinois 16
15 Indiana 26
16 Iowa 34
17 Kansas 27
18 Kentucky 20
19 Louisiana 4
20 Maine 51
21 Maryland 30
22 Massachusetts 39
23 Michigan 14
24 Minnesota 23
25 Mississippi 41
26 Missouri 32
27 Montana 25
28 Nebraska 21
29 Nevada 45
30 New Hampshire 47
31 New Jersey 33
32 New Mexico 5
33 New York 44
34 North Carolina 13
35 North Dakota 31
36 Ohio 35
37 Oklahoma 6
38 Oregon 18
39 Pennsylvania 40
40 Rhode Island 49
41 South Carolina 29
42 South Dakota 46
43 Tennessee 43
44 Texas 3
45 Utah 17
46 Vermont 50
47 Virginia 8
48 Washington 24
49 West Virginia 22
50 Wisconsin 36
51 Wyoming 1
And I want to draw a US State map with different colors for each ranking. The code I have is:
names(drawdata) = c('region','value')
drawdata[,1] = tolower(drawdata[,1])
states = data.frame(state.center, state.abb)
states_map = map_data("state")
df = merge(drawdata, states_map, by = "region")
df$num = 49
p1 = ggplot(data = df, aes(x = long, y = lat, group = group))
p1 = p1 + geom_polygon(aes(fill = cut_number(value, num[1])))
p1 = p1 + geom_path(colour = 'gray', linestyle = 2)
p1 = p1 + scale_fill_brewer('', palette = 'PuRd')
p1 = p1 + coord_map()
p1 = p1 + scale_x_continuous(breaks=NULL) + scale_y_continuous(breaks=NULL)
p1 = p1 + theme(legend.position="none")
p1 = p1 + geom_text(data = states, aes(x = x, y = y, label = state.abb, group = NULL), size = 2)
p1
This perfectly works if 'num', or the number of colors to fill, is small. However, when I set 'num=49', then it produces an error:
Error in cut.default(x, breaks(x, "n", n), include.lowest = TRUE, ...) :
'breaks' are not unique
When I alter the code from
p1 = p1 + geom_polygon(aes(fill = cut_number(value, num[1])))
to
p1 = p1 + geom_polygon(aes(fill = cut_number(unique(value), num[1])))
then it gives me a different error:
Error: Aesthetics must either be length one, or the same length as the dataProblems:cut_number(unique(value), num[1])
I want a map where every 49 States in the map have different colors, each reflecting their 'Ranking'. Any help is very appreciated!
Brewer palettes deliberately have small maximums (generally < 12) since it's pretty much impossible for humans to map the subtle differences to the discrete values you have. You can achieve what you're looking for by "faking" it with scale_fill_gradient2 (NOTE: I deliberately left the legend in as you should too):
library(ggplot2)
names(drawdata) <- c('region','value')
drawdata[,1] <- tolower(drawdata[,1])
states <- data.frame(state.center, state.abb)
states <- states[!(states$state.abb %in% c("AK", "HI")),] # they aren't part of states_map
states_map <- map_data("state")
p1 <- ggplot()
# borders
p1 <- p1 + geom_map(data=states_map, map=states_map,
aes(x=long, y=lat, map_id=region),
color="white", size=0.15)
# fills
p1 <- p1 + geom_map(data=drawdata, map=states_map,
aes(fill=value, map_id=region),
color="white", size=0.15)
# labels
p1 <- p1 + geom_text(data=states,
aes(x=x, y=y, label=state.abb, group=NULL), size=2)
# decent projection
p1 <- p1 + coord_map("albers", lat0=39, lat1=45)
p1 <- p1 + scale_fill_gradient2(low="#f7f4f9", mid="#df65b0", high="#67001f")
# better theme
p1 <- p1 + labs(x=NULL, y=NULL)
p1 <- p1 + theme_bw()
p1 <- p1 + theme(panel.grid=element_blank())
p1 <- p1 + theme(panel.border=element_blank())
p1 <- p1 + theme(axis.ticks=element_blank())
p1 <- p1 + theme(axis.text=element_blank())
p1
You can get an even better result with scale_fill_distiller which does alot under the scenes to let you use a Color Brewer palette with continuous data (I'd argue you do not have continuous data tho):
p1 <- p1 + scale_fill_distiller(palette="PuRd")
I'd strongly suggest continuing to use cut like you had originally and having a max of 9 breaks to fit into the Color Brewer palette you're trying to work with. In reality, folks are still going to need a table to really grok the rankings (never assume Americans know either state shapes, locations or even the two-letter abbreviations for them), so I'd also pretty much just suggest using an actual table with full names at least with this choropleth if not in place of it.
Note also that the way you're trying to build the map deliberately excluded Alaska, Hawaii and the District of Columbia. You'll need to use a real shapefile and something like I cover here to get them to show up nicely.
If you want different colors for each state, using a gradient, you can work with scale_fill_gradient. Here is one version, using green and red at the ends of the gradient, so that each state is on that scale.
ggplot(data = df, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = value)) +
geom_path(colour = 'gray', linestyle = 2) +
scale_fill_gradient(low = "green", high = "red") +
coord_map() +
scale_x_continuous(breaks=NULL) + scale_y_continuous(breaks=NULL) +
theme(legend.position="none") +
geom_text(data = states, aes(x = x, y = y, label = state.abb, group = NULL), size = 2)