Add_annotations plotly first and last datapoints - r

My Main Goal:
Trying to add annotations to both the first datapoint of my
scatterplot and the last datapoint of my scatterplot (the entries for
years 2006 and 2021 respectively).
My Secondary Goals:
If possible, it would also be helpful to find out how to select out
specific datapoints to add annotations, as I only know the
which.max/which.min functions so far.
It would also be nice to know how to list the jobs on each point.
My Dput:
structure(list(Year = 2006:2021, Month_USD = c(1160L, 1240L,
1360L, 1480L, 1320L, 1320L, 375L, 1600L, 2000L, 2000L, 1600L,
2240L, 1900L, 2300L, 2900L, 2300L), Degree = c("High School",
"High School", "High School", "High School", "High School", "High School",
"High School", "High School", "High School", "BA", "BA", "BA",
"BA", "BA", "M.Ed", "M.Ed"), Country = c("USA", "USA", "USA",
"USA", "USA", "USA", "DE", "USA", "USA", "USA", "USA", "USA",
"PRC", "PRC", "PRC", "HK"), Job = c("Disher", "Prep", "Prep",
"Prep", "Prep", "Prep", "Au Pair", "CSA", "Valet", "Valet", "Intake",
"CM", "Teacher", "Teacher", "Teacher", "Student"), Median_Household_Income_US = c(4833L,
4961L, 4784L, 4750L, 4626L, 4556L, 4547L, 4706L, 4634L, 4873L,
5025L, 5218L, 5360L, 5725L, NA, NA), US_Home_Price_Index = c(183.24,
173.36, 152.56, 146.69, 140.64, 135.16, 143.88, 159.3, 166.5,
175.17, 184.51, 195.99, 204.9, 212.59, 236.31, NA)), class = "data.frame", row.names = c(NA,
-16L))
Current Scatterplot:
pal <- c("Red", "Blue", "Green")
plot_ly(data = Earnings_Year,
x=~Year,
y=~Month_USD,
type='scatter',
mode='markers',
symbol = ~as.factor(Degree),
symbols=c("star-open-dot","hexagon-open-dot","diamond-open-dot"),
color = ~as.factor(Degree),
colors = pal,
hoverinfo="text",
text= paste("Year: ",
Earnings_Year$Year,
"<br>", #this is a line break
"Monthly USD: ",
Earnings_Year$Month_USD),
size=10) %>%
add_annotations(
x=Earnings_Year$Year[which.min(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.min(Earnings_Year$Month_USD)],
text = "Au Pair Job in Germany") %>%
add_annotations(
x=Earnings_Year$Year[which.max(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.max(Earnings_Year$Month_USD)],
text = "Last Teaching Job in China") %>%
layout(legend= list(x=1,y=0.5),
title="Earnings by Degree",
xaxis=list(title="Year"),
yaxis=list(title="Monthly USD"))
Image of Current Scatter:
Scatter That I Want:

Figured it out. Just needed to pipe additional add_annotations as well as just select specific values for x and y:
pal <- c("Red", "Blue", "Green")
plot_ly(data = Earnings_Year,
x=~Year,
y=~Month_USD,
type='scatter',
mode='markers',
symbol = ~as.factor(Degree),
symbols=c("star-open-dot","hexagon-open-dot","diamond-open-dot"),
color = ~as.factor(Degree),
colors = pal,
hoverinfo="text",
text= paste("Year: ",
Earnings_Year$Year,
"<br>", #this is a line break
"Monthly USD: ",
Earnings_Year$Month_USD),
size=10) %>%
add_annotations(
x=Earnings_Year$Year[which.min(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.min(Earnings_Year$Month_USD)],
text = "Au Pair Job in Germany") %>%
add_annotations(
x=Earnings_Year$Year[which.max(Earnings_Year$Month_USD)],
y=Earnings_Year$Month_USD[which.max(Earnings_Year$Month_USD)],
text = "Last Teaching Job in China") %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2006],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==1160],
text="First Job"
) %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2021],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==2300],
text="Began Ph.D.") %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2008],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==1360],
text="Finished H.S.") %>%
add_annotations(
x=Earnings_Year$Year[Earnings_Year$Year==2015],
y=Earnings_Year$Month_USD[Earnings_Year$Month_USD==2000],
text="Finished BA") %>%
layout(legend= list(x=1,y=0.5),
title="Earnings by Degree",
xaxis=list(title="Year"),
yaxis=list(title="Monthly USD"))
Finished Product:

Related

How to create a double bar contrast plot using ggplot2

I am trying to replicate a plot using ggplot2 to show the performance between two soccer teams. I look for getting this plot:
I have the data df for that. This is the code I have been using:
library(ggplot2)
#Plot
ggplot(df,aes(x=type,y=value,fill=name))+
geom_bar(stat = 'identity')+
geom_text(aes(label=abs(value),hjust=1))+
coord_flip()
Which produces next result:
The result is far away from first plot.
The issues I face are next:
The scale is too large for some type values, so is not possible to see each measure with independent scale. I think a facet_grid() or facet_wrap() would solve this but it did not worked.
The values for type must go in the middle, but I do not know how to move the axis to middle.
The labels for each bar should be on top of each bar but when I adjust hjust one of them goes to the right place but the other is placed wrong.
Finally, in df there is column named logo which stores the flags of two teams, how I can add the flags for each team one at top right side and the other at top left side.
Many thanks. This is the data df.
#Data
df <- structure(list(type = c("Shots on Goal", "Shots off Goal", "Total Shots",
"Blocked Shots", "Shots insidebox", "Shots outsidebox", "Fouls",
"Corner Kicks", "Offsides", "Ball Possession % ", "Yellow Cards",
"Red Cards", "Goalkeeper Saves", "Total passes", "Passes accurate",
"Passes %", "Shots on Goal", "Shots off Goal", "Total Shots",
"Blocked Shots", "Shots insidebox", "Shots outsidebox", "Fouls",
"Corner Kicks", "Offsides", "Ball Possession % ", "Yellow Cards",
"Red Cards", "Goalkeeper Saves", "Total passes", "Passes accurate",
"Passes %"), value = c(7, 2, 13, 4, 10, 3, 9, 8, 2, 78, 0, 0,
1, 797, 716, 90, -3, -4, -8, -1, -6, -2, -14, 0, -2, -22, -2,
0, -1, -215, -142, -66), name = c("England", "England", "England",
"England", "England", "England", "England", "England", "England",
"England", "England", "England", "England", "England", "England",
"England", "Iran", "Iran", "Iran", "Iran", "Iran", "Iran", "Iran",
"Iran", "Iran", "Iran", "Iran", "Iran", "Iran", "Iran", "Iran",
"Iran"), logo = c("https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/10.png",
"https://media.api-sports.io/football/teams/10.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png", "https://media.api-sports.io/football/teams/22.png",
"https://media.api-sports.io/football/teams/22.png")), row.names = c(NA,
-32L), class = c("tbl_df", "tbl", "data.frame"))
Perhaps something like this?
library(tidyverse)
library(ggimage)
df %>%
group_by(type) %>%
mutate(tot = sum(abs(value)),
prop = value/tot) %>%
ggplot(aes(prop, type, color = name)) +
geom_linerange(aes(xmin = -1, xmax = 1), color = "gray95", linewidth = 3) +
geom_linerange(aes(xmin = prop, xmax = 0), linewidth = 3) +
scale_color_manual(values = c("#8ded05", "#00aaff")) +
geom_text(aes(label = type, x = 0), check_overlap = TRUE, nudge_y = 0.4,
color = "black") +
geom_text(aes(x = ifelse(name == "England", 1.05, -1.05),
label = abs(value)), color = "black") +
theme_void() +
scale_y_discrete(expand = c(0.1, 0)) +
annotate(geom = "text", x = c(-0.5, 0.5), y = c(17, 17),
label = c("Iran", "England"), size = 5) +
geom_image(data = data.frame(x = c(1, -1), y = 17,
image = unique(df$logo)),
aes(x, y, image = image), inherit.aes = FALSE)+
guides(color = guide_none())

Had a couple problems with One Way ANOVA in R

My dput is this:
structure(list(Year = 2006:2021, Month_USD = c(1160L, 1240L, 1360L, 1480L, 1320L, 1320L, 375L, 1600L, 2000L, 2000L, 1600L, 2240L, 1900L, 2300L, 2900L, 2300L), Degree = c("High School", "High School", "High School", "High School", "High School", "High School", "High School", "High School", "High School", "BA", "BA", "BA", "BA", "BA", "M.Ed", "M.Ed"), Country = c("USA", "USA", "USA", "USA", "USA", "USA", "DE", "USA", "USA", "USA", "USA", "USA", "PRC", "PRC", "PRC", "HK"), Job = c("Disher", "Prep", "Prep", "Prep", "Prep", "Prep", "Au Pair", "CSA", "Valet", "Valet", "Intake", "CM", "Teacher", "Teacher", "Teacher", "Student"), Median_Household_Income_US = c(4833L, 4961L, 4784L, 4750L, 4626L, 4556L, 4547L, 4706L, 4634L, 4873L, 5025L, 5218L, 5360L, 5725L, NA, NA), US_Home_Price_Index = c(183.24, 173.36, 152.56, 146.69, 140.64, 135.16, 143.88, 159.3, 166.5, 175.17, 184.51, 195.99, 204.9, 212.59, 236.31, NA)), class = "data.frame", row.names = c(NA, -16L))
So I ran a one-way ANOVA on this data and had a couple problems. First, when I ran the level function here:
data(Earnings_Year)
View(Earnings_Year)
set.seed(1234)
Earnings_Year %>%
sample_n_by(Degree,
size=1)
levels(Earnings_Year$Degree)
For whatever reason the code above wont show the levels and just spits out "NULL." As far as I know, the levels should be "BA", "High School", and "M.Ed."
Another issue I had later was when I ran this. When I ran a generic Shapiro test there didnt seem to be the same issue until I grouped it:
Earnings_Year %>%
group_by(Degree) %>%
shapiro_test(Month_USD)
When I run it, it comes up with the following problem:
Error: Problem with `mutate()` column `data`.
i `data = map(.data$data, .f, ...)`.
x Problem with `mutate()` column `data`.
i `data = map(.data$data, .f, ...)`.
x sample size must be between 3 and 5000
Run `rlang::last_error()` to see where the error occurred.
Any insight on what went wrong would be appreciated. Overall, I ended up with a nice ANOVA boxplot at the end that seemed to indicate what I was looking for:
As the error message suggests there are certain groups in your data which have less than 3 rows or more than 5000 rows.
We can check number of rows in each group using count.
library(dplyr)
library(rstatix)
df %>% count(Degree)
# Degree n
#1 BA 5
#2 High School 9
#3 M.Ed 2
You can remove such groups and the code should work fine.
df %>%
group_by(Degree) %>%
filter(n() > 2) %>%
shapiro_test(Month_USD)
# Degree variable statistic p
# <chr> <chr> <dbl> <dbl>
#1 BA Month_USD 0.944 0.695
#2 High School Month_USD 0.887 0.185

ggplot by group with filter()

I have big dataset with the following format:
structure(list(LOCATION = c("CAN", "CAN", "CAN", "CAN", "CAN",
"CAN", "CAN", "CAN", "CAN", "CAN"), Country = c("Canada", "Canada",
"Canada", "Canada", "Canada", "Canada", "Canada", "Canada", "Canada",
"Canada"), SUBJECT = c("ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99",
"ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99", "ULABUL99"
), Subject = c("Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost",
"Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost",
"Unit Labour Cost", "Unit Labour Cost", "Unit Labour Cost"),
SECTOR = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Sector = c("Total Economy",
"Total Economy", "Total Economy", "Total Economy", "Total Economy",
"Total Economy", "Total Economy", "Total Economy", "Total Economy",
"Total Economy"), MEASURE = c("ST", "ST", "ST", "ST", "ST",
"ST", "ST", "ST", "ST", "ST"), Measure = c("Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency", "Level, ratio or national currency",
"Level, ratio or national currency"), FREQUENCY = c("A",
"A", "A", "A", "A", "A", "A", "A", "A", "A"), Frequency = c("Annual",
"Annual", "Annual", "Annual", "Annual", "Annual", "Annual",
"Annual", "Annual", "Annual"), TIME = 1970:1979, Time = 1970:1979,
Value = c(0.1304592, 0.1357066, 0.1430287, 0.1521136, 0.1752398,
0.2018611, 0.2193767, 0.2347496, 0.2470616, 0.2663881), Flag.Codes = c("E",
"E", "E", "E", "E", "E", "E", "E", "E", "E"), Flags = c("Estimated value",
"Estimated value", "Estimated value", "Estimated value",
"Estimated value", "Estimated value", "Estimated value",
"Estimated value", "Estimated value", "Estimated value")), row.names = c(NA,
10L), class = "data.frame")
And I want to draw time plot like the following (for each sector group in a particular country's particular subject, in this case, Germany's Labour Income Share)
I tried to code as follows:
library(ggplot2)
library(tidyr)
df <- read.csv("/Users/ulc.csv", header = TRUE)
fsector = factor(df$SECTOR)
df %>%
filter(df$MEASURE =="ST",
df$SUBJECT == "ULAIRU99",
df$LOCATION == "DEU") %>%
ggplot(aes(x = df$year, y = df$value, color = fsector, linetype = fsector)) +
scale_color_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
scale_linetype_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
theme(legend.position = c(0.8, 0.3), legend.title = element_blank()) +
ylab("LIS of Germany by sector") + xlab("year")
But the result does not show any plots and seems like a lot of elements are missing in my code. Maybe should I add geom_line() for each sector? But there seems much simpler way. Any help would be appreciated.
You can try the following code -
library(dplyr)
library(ggplot2)
df %>%
filter(MEASURE =="ST",SUBJECT == "ULAIRU99",LOCATION == "DEU") %>%
mutate(SECTOR = factor(SECTOR)) %>%
ggplot(aes(x = TIME, y = Value, color = SECTOR, linetype = SECTOR)) +
geom_line() +
scale_color_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
scale_linetype_manual(labels=c("Sec 1","Sec 2", "Sec 3", "Sec 4", "Sec 5", "Sec 6", "Sec 7", "Sec 8"), values = 1:8) +
theme(legend.position = c(0.8, 0.3), legend.title = element_blank()) +
ylab("LIS of Germany by sector") + xlab("year")

How to group a legend or get seperate legends by facets in ggplot2

I have a dataset that I am presenting facetted by region and then using sub region as a fill. I have defined the colours using a separate named variable relating to the names of the subregion. I am wondering if it is possible to make the legend itself grouped in a similar way to the facet to make it easier to interpret.
The named sub_region variable
sub_region_colours <- c("South America" = "#0570b0", "Western Africa" = "#8c96c6", "Central America" = "#74a9cf", "Eastern Africa" = "#8856a7", "Northern Africa" = "#edf8fb", "Middle Africa" = "#b3cde3", "Southern Africa" = "#810f7c", "Northern America" = "#f1eef6", "Caribbean" = "#bdc9e1", "Eastern Asia" = "#bd0026", "Southern Asia" = "#fd8d3c", "South-Eastern Asia" = "#f03b20", "Southern Europe" = "#238b45", "Australia and New Zealand" = "#ce1256", "Melanesia" = "#df65b0", "Micronesia" = "#d7b5d8", "Polynesia" = "#f1eef6", "Central Asia" = "#fecc5c", "Western Asia" = "#ffffb2", "Eastern Europe" = "#66c2a4", "Northern Europe" = "#edf8fb", "Western Europe" = "#b2e2e2", "Small Islands" = "#252525")
This is the head(exporting_countries) grouping by sender_iso3, year and sender_region removed.
structure(list(sender_iso3 = c("ABW", "ABW", "ABW", "ABW", "ABW",
"ABW"), year = c(2005, 2011, 2014, 2015, 2016, 2017), sender_region = c("Americas",
"Americas", "Americas", "Americas", "Americas", "Americas"),
sender_subregion = c("Caribbean", "Caribbean", "Caribbean",
"Caribbean", "Caribbean", "Caribbean"), export = c(1, 1,
4, 5, 2, 1)), class = "data.frame", row.names = c(NA, -6L
))
Finally this is the code for the current plot
geom_bar()+
labs(title = "Number of countries reporting export of chickens",
fill = "Subregion")+
facet_wrap(~ sender_region)+
theme_minimal()+
scale_x_continuous(name = "Year", limits = c(1986, 2017), breaks = c(1986, 1990, 2000, 2010, 2017), guide = guide_axis(angle = 90))+
scale_fill_manual(values = sub_region_colours)+
guides(fill = guide_legend(ncol = 2))
Which at the moment produces this:
Graph with less than ideal legend
It would be great if I can group the legend fill colours similarly to the facets which would make it easier to read off.
One approach to achieve this would be to make seperate plots for each region and make use of patchwork to glue the plots together. A second approach would be to make use of the ggnewscale package which allows to have multiple fill (or ...) scales and legends in one plot.
However, similiar to using patchwork the approach using ggnewscale package could become a bit tedious as it requires to split the data according to the number of facets and plot each dataset via seperate layers. Therefore my solution adds a helper function which 1) splits the data and sets up the layers for each region or facet and 2) can be used to loop over the regions via e.g. lapply.
BTW: As your sample data included only one region I added a second region.
library(dplyr)
library(ggplot2)
library(ggnewscale)
sub_region_colours <- c("South America" = "#0570b0", "Western Africa" = "#8c96c6", "Central America" = "#74a9cf", "Eastern Africa" = "#8856a7", "Northern Africa" = "#edf8fb", "Middle Africa" = "#b3cde3", "Southern Africa" = "#810f7c", "Northern America" = "#f1eef6", "Caribbean" = "#bdc9e1", "Eastern Asia" = "#bd0026", "Southern Asia" = "#fd8d3c", "South-Eastern Asia" = "#f03b20", "Southern Europe" = "#238b45", "Australia and New Zealand" = "#ce1256", "Melanesia" = "#df65b0", "Micronesia" = "#d7b5d8", "Polynesia" = "#f1eef6", "Central Asia" = "#fecc5c", "Western Asia" = "#ffffb2", "Eastern Europe" = "#66c2a4", "Northern Europe" = "#edf8fb", "Western Europe" = "#b2e2e2", "Small Islands" = "#252525")
d <- structure(list(sender_iso3 = c(
"ABW", "ABW", "ABW", "ABW", "ABW",
"ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW"
), year = c(
2005,
2011, 2014, 2015, 2016, 2017, 2005, 2011, 2014, 2015, 2016, 2017
), sender_region = c(
"Americas", "Americas", "Americas", "Americas",
"Americas", "Americas", "Africa", "Africa", "Africa", "Africa",
"Africa", "Africa"
), sender_subregion = c(
"Caribbean", "Caribbean",
"Caribbean", "Caribbean", "Caribbean", "Caribbean", "Southern Africa",
"Southern Africa", "Southern Africa", "Southern Africa", "Southern Africa",
"Southern Africa"
), export = c(
1, 1, 4, 5, 2, 1, 1, 1, 4, 5,
2, 1
)), class = "data.frame", row.names = c(NA, -12L))
regions <- unique(d$sender_region)
# Layers for each region
make_layers <- function(x) {
d <- filter(d, sender_region == regions[[x]])
list(
if (x != 1) new_scale_fill(),
geom_bar(data = d, aes(x = year, fill = sender_subregion)),
scale_fill_manual(
values = sub_region_colours,
guide = guide_legend(
order = x,
title = regions[x],
title.position = "top"
)
)
)
}
p <- ggplot() +
lapply(seq_along(regions), make_layers)
# Add theme and wrap
p +
theme_minimal() +
scale_x_continuous(
name = "Year", limits = c(1986, 2017),
breaks = c(1986, 1990, 2000, 2010, 2017),
guide = guide_axis(angle = 90)
) +
facet_wrap(~sender_region)

Using mutate and a lookup/calc funtion

I wrote a function where I pass a company name to lookup in a 2nd table a set of records, calculate a complicated result, and return the result.
I want to process all companies and add a value to each record with that result.
I am using the following code:
`aa <- mutate(companies,newcol=sum_rounds(companies$company_name))`
But I get the following warning:
Warning message:
In c("Bwom", "Symple", "TravelTriangle", "Ark Biosciences", "Artizan Biosciences", :
longer object length is not a multiple of shorter object length
(each of these is a company name)
The company dataframe gets a new column, but all values are "false" where actually there should be both true and false.
Any advice would be welcome to a newbie.
Function follows:
sum_rounds<-function(co_name) {
#get records from rounds for the company name passed to the function
#remove NAs from column roundtype too
outval<- rounds %>%
filter(company_name.x==co_name & !is.na(roundtype)) %>%
#sort by date round is announced
arrange(announced_on) %>%
select(roundtype) %>%
#create a string of all round types in order
apply(2,paste,collapse="")
#the values from mixed to "M", venture to "V" and pureangel to "A"
# now see if it is of the form aaaaa (and #) followed by m or v
# in grep: ^ is start of a line and + is for ar least one copy
# [mv] is either m or v
# nice summary is here: http://www.endmemo.com/program/R/gsub.php
#is angel2vc?
angel2vc<-grepl("^a+[mv]+",outval)
#return(list("roundcodes"=outval,"angel2vc"=angel2vc))
return(angel2vc)
}
DPUT from Companies table Follows:
structure(list(company_name = c("Bwom", "Symple", "TravelTriangle",
"Ark Biosciences", "Artizan Biosciences", "Audiense"), domain = c("b-wom.com",
"getsymple.com", "traveltriangle.com", "arkbiosciences.com",
NA, "audiense.com"), country_code = c("ESP", "USA", "USA", "CHN",
"USA", "GBR"), state_code = c(NA, "CA", "VA", NA, "NC", NA),
region = c("Barcelona", "SF Bay Area", "Washington, D.C.",
"Shanghai", "Raleigh", "London"), city = c("Barcelona", "San Francisco",
"Charlottesville", "Shanghai", "Durham", "London"), status = c("operating",
"operating", "operating", "operating", "operating", "operating"
), short_description = c("Bwom is a tool that offers a test and personalized exercises for women's intimate health.",
"Symple is the cloud platform for all your business payments. Pay, get paid, connect.",
"TravelTriangle enables travel enthusiasts to reserve a personalized holiday plan with a local travel agent.",
"Ark Biosciences is a biopharmaceutical company that is dedicated to the discovery and development",
"Artizan Biosciences", "SaaS developer delivering unique consumer insight and engagement capabilities to many of the world’s biggest brands and agencies."
), category_list = c("health care", "cloud computing|machine learning|mobile apps|mobile payments|retail technology",
"e-commerce|personalization|tourism|travel", "health care",
"biopharma", "analytics|apps|marketing|market research|social crm|social media|social media marketing"
), category_group_list = c("health care", "apps|commerce and shopping|data and analytics|financial services|hardware|internet services|mobile|payments|software",
"commerce and shopping|travel and tourism", "health care",
"biotechnology|health care|science and engineering", "apps|data and analytics|design|information technology|internet services|media and entertainment|sales and marketing|software"
), employee_count = c("1 to 10", "11 to 50", "101 to 250",
NA, "1 to 10", "51 to 100"), funding_rounds = c(2L, 1L, 4L,
2L, 2L, 5L), funding_total_usd = c(1075791, 120000, 19900000,
NA, 3e+06, 8013391), founded_on = structure(c(16555, 16770,
15156, 16071, NA, 14975), class = "Date"), first_funding_on = structure(c(16526,
17204, 15492, 16532, 17091, 15294), class = "Date"), last_funding_on = structure(c(17204,
17204, 17204, 17203, 17203, 17203), class = "Date"), closed_on = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), email = c("hello#b-wom.com", "info#getsymple.com",
"admin#traveltriangle.com", "info#arkbiosciences.com", NA,
"moreinfo#audiense.com"), phone = c(NA, NA, "'+91 98 99 120408",
"###############################################################################################################################################################################################################################################################",
NA, "###############################################################################################################################################################################################################################################################"
), cb_url = c("https://www.crunchbase.com/organization/bwom",
"https://www.crunchbase.com/organization/symple-2", "https://www.crunchbase.com/organization/traveltriangle-com",
"https://www.crunchbase.com/organization/ark-biosciences",
"https://www.crunchbase.com/organization/artizan-biosciences",
"https://www.crunchbase.com/organization/socialbro"), twitter_url = c("https://www.twitter.com/hellobwom",
NA, "https://www.twitter.com/traveltriangle", NA, NA, "https://www.twitter.com/socialbro"
), facebook_url = c("https://www.facebook.com/hellobwom/?fref=ts",
NA, "http://www.facebook.com/traveltriangle", NA, NA, "http://www.facebook.com/socialbro"
), uuid = c("e6096d58-3454-d982-0dbe-7de9b06cd493", "fd0ab78f-0dc4-1f18-21d1-7ce9ff7a173b",
"742043c1-c17a-4526-4ed0-e911e6e9555b", "8e27eb22-ce03-a2af-58ba-53f0f458f49c",
"ed07ac9e-1071-fca0-46d9-42035c2da505", "fed333e5-2754-7413-1e3d-5939d70541d2"
), isbio = c("other", "other", "other", "other", "bio", "other"
), co_type = c("m", "m", "m", "v", "v", "m")), .Names = c("company_name",
"domain", "country_code", "state_code", "region", "city", "status",
"short_description", "category_list", "category_group_list",
"employee_count", "funding_rounds", "funding_total_usd", "founded_on",
"first_funding_on", "last_funding_on", "closed_on", "email",
"phone", "cb_url", "twitter_url", "facebook_url", "uuid", "isbio",
"co_type"), row.names = c(NA, -6L), class = c("tbl_df", "tbl",
"data.frame"))
>

Resources