Stacked Bar ordered by Sum of Fill with ggplot2 - r

With the following data (an already melted data frame):
df1<-structure(list(Speciality = structure(27:32, .Label = c("Addiction Medicine",
"Anesthesiology", "Cardiac Electrophysiology", "Cardiology",
"Dermatology", "Emergency Medicine", "Family Medicine", "Gastroenterology",
"General Surgery", "Hematology & Oncology", "Hospitalist", "Internal Medicine",
"Nephrology", "Neurological Surgery", "Neurology", "Obstetrics & Gynecology",
"Otolaryngology", "Pain Medicine", "Pathology", "Pediatric Critical Care Medicine",
"Pediatric Hematology-Oncology", "Pediatric Pulmonology", "Pediatric Radiology",
"Pediatric Surgery", "Pediatrics", "Psychiatry", "Pulmonology",
"Radiation Oncology", "Radiology", "Surgical Oncology", "Urology",
"Vascular Surgery"), class = "factor"), PhysAge = structure(c(5L,
5L, 1L, 3L, 5L, 5L), .Label = c("25-34", "35-44", "45-54", "55-64",
"65+"), class = "factor"), value = c(0.0035, 0.0058, 0.0089, 0, 0.00512820512820513,
0.00512820512820513)), .Names = c("Speciality", "PhysAge", "value"
), row.names = 155:160, class = "data.frame")
How can I reorder in ggplot based on the sum of values for each Speciality in a stacked bar chart. I've found some options where the value is multiple columns, but in this case it's one value column.
Currently plotting by:
ggplot(df,aes(x=Speciality,y=value,fill=PhysAge))+
geom_bar(stat="identity")

You could try
set.seed(1)
df <- rbind(
AgevsPractice.melt,
transform(AgevsPractice.melt, PhysAge="1", value=runif(6, 0, 0.01)),
transform(AgevsPractice.melt, PhysAge="10", value=runif(6, 0, 0.01))
)
ggplot(df,aes(x=reorder(Speciality, value, sum), y=value,fill=PhysAge))+
geom_bar(stat="identity")

Related

sapply() returns list instead of character vector

I'm trying to extract the most representative and most generic keywords from a set of journals (using tf-idf). I've created two structures:
-ISSN_STEMKW_tf_idf is the dataframe with all keywords from all journals and their parameters. Find below the first 10 elements as a sample
dput(ISSN_STEMKW_tf_idf[1:10,])
structure(list(ISSN = c("03029743", "03029743", "03029743", "03029743",
"03029743", "14327643", "16130073", "10504729", "14327643", "14327643"
), word = c("artificialintelligence", "mathematicalmodels", "logicprogramming",
"problemsolving", "semantics", "evolutionaryalgorithms", "informationanalysis",
"robotics", "metaheuristics", "optimization"), n = c(16L, 11L,
10L, 10L, 10L, 10L, 9L, 8L, 8L, 8L), total = c(1050L, 1050L,
1050L, 1050L, 1050L, 317L, 129L, 124L, 317L, 317L), tf = c(0.0152380952380952,
0.0104761904761905, 0.00952380952380952, 0.00952380952380952,
0.00952380952380952, 0.0315457413249211, 0.0697674418604651,
0.0645161290322581, 0.0252365930599369, 0.0252365930599369),
idf = c(2.70805020110221, 2.45022109180011, 3.23867845216438,
2.2454266791541, 2.40576932922928, 2.14006616349627, 3.74950407593037,
3.34403896782221, 2.54553127160443, 2.17396771517195), tf_idf = c(0.0412655268739384,
0.0256689828664773, 0.0308445566872798, 0.0213850159919438,
0.0229120888498026, 0.0675099736118697, 0.261593307623049,
0.215744449536917, 0.0642405368228249, 0.0548635385532354
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
STEM_journals is a dataframe with all parameters from each analysed journals. Find below first 10 elements as well
dput(journals_STEM[1:10,])
structure(list(Title = c("Nature Reviews Materials", "National vital statistics reports : from the Centers for Disease Control and Prevention, National Center for Health Statistics, National Vital Statistics System",
"Reviews of Modern Physics", "Chemical Reviews", "Nature Energy",
"Nature Reviews Chemistry", "Nature Materials", "Chemical Society Reviews",
"Advances in Physics", "Proceedings of the IEEE International Conference on Computer Vision"
), ISSN = c("20588437", "15518922", "00346861", "15206890", "20587546",
"23973358", "14764660", "03060012", "14606976", "15505499"),
Categories = c("Biomaterials;Electronic, Optical and Magnetic Materials;Energy;Materials Chemistry;Surfaces, Coatings and Films ",
"Life-span and Life-course Studies ", "Physics and Astronomy ",
"Chemistry ", "Electronic, Optical and Magnetic Materials;Energy Engineering and Power Technology;Fuel Technology;Renewable Energy, Sustainability and the Environment ",
"Chemical Engineering;Chemistry ", "Chemistry;Condensed Matter Physics;Materials Science;Mechanical Engineering;Mechanics of Materials ",
"Chemistry ", "Condensed Matter Physics ", "Computer Vision and Pattern Recognition; Software"
), Categories4dist = c("Biomaterials ElectronicOpticalMagneticMaterials Energy MaterialsChemistry SurfacesCoatingsFilms",
"Life-spanLife-courseStudies", "PhysicsAstronomy", "Chemistry",
"ElectronicOpticalMagneticMaterials EnergyEngineeringPowerTechnology FuelTechnology RenewableEnergySustainabilitytheEnvironment",
"ChemicalEngineering Chemistry", "Chemistry CondensedMatterPhysics MaterialsScience MechanicalEngineering MechanicsofMaterials",
"Chemistry", "CondensedMatterPhysics", "ComputerVisionPatternRecognition Software"
), Top.Level = c("Physical Sciences and Engineering", "Physical Sciences and Engineering",
"Physical Sciences and Engineering", "Physical Sciences and Engineering",
"Physical Sciences and Engineering", "Physical Sciences and Engineering",
"Physical Sciences and Engineering", "Physical Sciences and Engineering",
"Physical Sciences and Engineering", "Physical Sciences and Engineering"
), Primary.Level = c("Chemistry", "Mathematics", "Physics and Astronomy",
"Chemistry", "Energy", "Chemical Engineering", "Chemistry",
"Chemistry", "Materials Science", "Computer Science"), Secondary.Level = c("Chemistry (General)",
"Statistics and Probability", "Physics and Astronomy (General)",
"Chemistry (General)", "Fuel Technology", "Process Chemistry and Technology",
"Chemistry (General)", "Chemistry (General)", "Materials Science (General)",
"Computer Graphics and Computer-Aided Design"), `Group by JCRCats k= 22` = c(1L,
2L, 3L, 4L, 5L, 5L, 5L, 4L, 6L, 7L), Journal_KW = list(`20588437` = NA,
`15518922` = NA, `00346861` = NA, `15206890` = NA, `20587546` = NA,
`23973358` = NA, `14764660` = NA, `03060012` = NA, `14606976` = NA,
`15505499` = NA), Journal_KW_Gen = list(`20588437` = NA,
`15518922` = NA, `00346861` = NA, `15206890` = NA, `20587546` = NA,
`23973358` = NA, `14764660` = NA, `03060012` = NA, `14606976` = NA,
`15505499` = NA)), row.names = c(3L, 6L, 9L, 12L, 17L,
24L, 26L, 30L, 31L, 38L), class = "data.frame")
I've created a function that brings together all keywords from papers with the same ISSN (i.e. same journals) and, using , returns the most meaningful ones (supposedly as character type)
journal_main_kw<- function (tfidf_df, jr_issn, threshold){
if (jr_issn %in% tfidf_df$ISSN) {
aux<- tfidf_df[(tfidf_df$ISSN==jr_issn)&(tfidf_df$tf_idf)>threshold,]$word
aux<-unlist(aux)
return(aux)
}
else return(NA)
}
(I have a similar one to return the least meaningful; only change is a < instead of a >)
I have tested this function isolatedly, passing a single ISSN, and it returns a character vector as expected. Even if if it includes more than one word, it is still a character type (checked with typeof). However, when I try to iterate over the whole STEM_journals ISSN column, the function fills the new column with lists instead of characters (other than that, it works correctly from a qualitative point of view):
journals_STEM$Journal_KW<-sapply(journals_STEM$ISSN, journal_main_kw, tfidf_df=ISSN_STEMKW_tf_idf,threshold=term_threshold)
Furthermore, even if I have the NA return condition in my function, some cells in the result column are filled with character(0) -when no elements match the return condition- which are hard to remove because they are lists (I guess, because looking for length 0 element is not working in this code). I can avoid this, I think, but the list issue is major trouble for what I plan to do later.
Any ideas about what I am doing wrong? I've read a similar question, stating that since I'm getting an undefined number of results, they must come in list format, but I have no issues with the function until I use sapply:
journal_generic_kw(ISSN_STEMKW_tf_idf,"01628828", 0.5*term_threshold)
[1] "algorithms" "datamining" "estimation"
typeof(journal_generic_kw(ISSN_STEMKW_tf_idf,"01628828", 0.5*term_threshold))
[1] "character"

Apply a function to particular rows

I want to apply the following code to only the first 3 rows (if it's applied to the second 3, it fails to parse. netflix_and_disney$release_year <-year(dmy(netflix_and_disney$release_year))
Is there a way about doing this with this df?
structure(list(show_id = c("00147800", "07019028", "00115433", "70234439", "80058654", "80125979"), title = c("10 Things I Hate About You", "101 Dalmatian Street", "101 Dalmatians", "Transformers Prime", "Transformers: Robots in Disguise", "#realityhigh"), type = c("Movie", "Tv Show", "Movie", "Tv Show", "Tv Show", "Movie"), rating = c("PG-13", "N/A", "G", "TV-Y7-FV", "TV-Y7", "TV-14"), release_year = c("31 Mar 1999", "25 Mar 2019", "27 Nov 1996", "2013", "2016", "2017"), date_added = structure(c(18212, 18320, 18212, 17782, 17782, 17417), class = "Date"), duration = c("97 min", "N/A", "103 min", "1 Season", "1 Season", "99 min"), genre = c("Comedy, Drama, Romance", "Animation, Comedy, Family", "Adventure, Comedy, Crime, Family", "Kids' TV", "Kids' TV", "Comedies"), director = c("Gil Junger", "N/A", "Stephen Herek", NA, NA, "Fernando Lebrija"), country = c("USA", "UK, USA, Canada", "USA, UK", "United States", "United States", "United States"), imdb_rating = c("7.3", "6.2", "5.7", NA, NA, NA), platform = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Disney", "Netflix"), class = "factor")), row.names = c(1L, 2L, 3L, 995L, 996L, 997L), class = "data.frame")
I have tried applying to a subset of the df but has failed to work, as well as applying the which() function
It really all depends on your data and what function you want to apply. But, in principle, you can do this by subsetting your dataframe:
Data:
set.seed(123)
df <- data.frame(
v1 = rnorm(20),
v2 = runif(20),
v3 = sample(20)
)
Here we apply the function meanto the first ten rows of df:
apply(df[1:10,], 1, mean)
1 2 3 4 5 6 7 8 9 10
4.5274415 0.7281229 2.9908109 1.8131179 6.7605775 6.6179570 4.2313168 3.4003004 3.1930399 5.8040552
netflix_and_disney$release_year[1:3] < year(dmy(netflix_and_disney$release_year[1:3]))

Set each column to it's own palette

I'm making a column chart of the amount of time I spend on various projects, each one for one of a range of "clients" (actually different "areas" of my job) using the excellent togglr package to download my tracked time data and ggplot2.
The code I'm using is this: (data dput(SO) output pasted below question)
library("ggplot2")
library("RColorBrewer")
theme_set(theme_bw())
colourCount = 48 #nrow(projects)
getPalette = colorRampPalette(brewer.pal(12, "Paired"))
ggplot(data = SO, aes(x = client, y = time_spent)) +
geom_col(aes(fill = area_project), colour = "black") +
scale_fill_manual(values = getPalette(colourCount)) +
theme(legend.position = "right") +
guides(fill=guide_legend(ncol = 2)) +
ggtitle("From Start to End") #paste("From", date(min(df$start)), " to", date(max(df$stop)))) +
xlab("Functional Area") + ylab("Hours")
Which produces this plot:
What I can't figure out how to do is to make each column it's own palette with different shades for each project.
I.E. I'd like all the boxes in the "0_Admin" column to be different blues, each box in the "1_Monitoring" column to be different greens, etc. The plot above is close, but mostly by coincidence and the number of projects/area. You'll notice for example that "3_Management" projects are both red and orange, and orange shades "bleed" all the way over to "7_Visitor Safety".
Over time, the number of projects will increase overall (but will be a subset when I'm reporting on smaller time periods) so a fully manual scale is not feasible, but the number of Areas will stay the same.
Any thoughts? Hints? Thanks!
SO <- structure(list(client = c("0_Admin", "0_Admin", "0_Admin", "0_Admin",
"0_Admin", "0_Admin", "0_Admin", "0_Admin", "0_Admin", "1_Monitoring",
"1_Monitoring", "1_Monitoring", "1_Monitoring", "1_Monitoring",
"1_Monitoring", "1_Monitoring", "2_Science", "2_Science", "2_Science",
"2_Science", "2_Science", "2_Science", "3_Management", "3_Management",
"3_Management", "3_Management", "3_Management", "3_Management",
"4_EA", "6_Fire", "6_Fire", "7_VisitorSafety", "8_ResConMisc",
"8_ResConMisc", "8_ResConMisc", "8_ResConMisc", "8_ResConMisc",
"8_ResConMisc", "8_ResConMisc", "8_ResConMisc", "8_ResConMisc",
"9_CrossFxn", "9_CrossFxn", "9_CrossFxn", "9_CrossFxn", "Z_Leave",
"Z_Leave", "Z_Leave"),
project = c("Email", "EPM", "Finance",
"HR", "Misc", "OHS", "RCPs", "Time mgmt", "Training", "Amphibians",
"Area burned", "Birds", "Rangeland Health", "Sediment", "Ungulates",
"Water Quality", "Bison GPS", "Bison science advisory group",
"Collaboration", "Corridor Use", "Grassland bird survey", "Misc",
"Beavers", "Bison", "Geese", "HUMP/HIP", "HWC", "Invasive Plants",
"Nest sweeps", "Fire crew", "Fire mgmt plan", "DO response",
"Duty Officer", "Media", "Misc", "Open Data", "Peer discussion",
"RC meeting", "Training", "Travel", "Work planning", "CC meeting",
"Events", "Misc", "Trails", "Appointments", "Stat holiday", "Vacation"
),
time_spent = c(174.709722222222, 15.2483333333333, 26.7827777777778,
127.603611111111, 21.7127777777778, 6.32222222222222, 11.9725,
3.32111111111111, 29.6375, 4.80333333333333, 0.498055555555556,
74.4958333333333, 21.8011111111111, 1.14111111111111, 21.5008333333333,
36.0780555555556, 1.44972222222222, 1.40694444444444, 6.83916666666667,
3.93027777777778, 6.94916666666667, 2, 28.7986111111111, 154.448888888889,
0.684444444444445, 12.5727777777778, 2.98861111111111, 1.89416666666667,
1.75, 21.2725, 11.0122222222222, 2.74333333333333, 0.817777777777778,
10.415, 84.9144444444444, 11.4, 19.7738888888889, 8.84444444444444,
38.7216666666667, 8, 11.6063888888889, 10.5191666666667, 3.41638888888889,
20.8216666666667, 0.298611111111111, 6.74611111111111, 30, 75.5
),
area_project = c("0_Email", "0_EPM", "0_Finance", "0_HR",
"0_Misc", "0_OHS", "0_RCPs", "0_Time mgmt", "0_Training", "1_Amphibians",
"1_Area burned", "1_Birds", "1_Rangeland Health", "1_Sediment",
"1_Ungulates", "1_Water Quality", "2_Bison GPS", "2_Bison science advisory group",
"2_Collaboration", "2_Corridor Use", "2_Grassland bird survey",
"2_Misc", "3_Beavers", "3_Bison", "3_Geese", "3_HUMP/HIP", "3_HWC",
"3_Invasive Plants", "4_Nest sweeps", "6_Fire crew", "6_Fire mgmt plan",
"7_DO response", "8_Duty Officer", "8_Media", "8_Misc", "8_Open Data",
"8_Peer discussion", "8_RC meeting", "8_Training", "8_Travel",
"8_Work planning", "9_CC meeting", "9_Events", "9_Misc", "9_Trails",
"Z_Appointments", "Z_Stat holiday", "Z_Vacation")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -48L), vars = "client", labels = structure(list(
client = c("0_Admin", "1_Monitoring", "2_Science", "3_Management",
"4_EA", "6_Fire", "7_VisitorSafety", "8_ResConMisc", "9_CrossFxn",
"Z_Leave")), class = "data.frame", row.names = c(NA, -10L), vars = "client", drop = TRUE),
indices = list(0:8, 9:15, 16:21, 22:27, 28L, 29:30, 31L, 32:40, 41:44, 45:47),
drop = TRUE, group_sizes = c(9L, 7L, 6L, 6L, 1L, 2L, 1L, 9L, 4L, 3L), biggest_group_size = 9L)

(still) Having trouble with ordering/sorting bar graph using plotly and shiny widget

I'm trying to numerically order my bar graph using plotly and I'm also using the shiny widget, select box, which displays the bar graph of each type of organization. (Ex. type of organizations are medical, web, gaming, military, etc) On the y-axis for the bar graph is the name of the organization and the x-axis has the number of records. Here is my coding for numerically ordering all of my bar graphs:
df <- original.df %>% choice(type_org == input$choice)
df$abbrv <- ifelse(nchar(df$name) > 20, abbreviate(df$name), df$name)
df$abbrv <- factor(df$abbrv, levels = unique(df$abbrv)[order(df$number, decreasing = FALSE)])
plot_ly(
x = df$number,
y = df$abbrv,
type = 'bar',
text = ifelse(nchar(df$name) > 20, df$name, "")) %>%
layout(margin = list(l = 150))
A bit explanation about my dataframe and coding. Basically, I abbreviated organization names that are longer than 20 character, which is why I have an abbrv column in my dataset. So in the y-axis, the full organization name doesn't show if it's longer than 20 characters, but instead it shows the abbreviation, where I used the abbreivation function. More details about it is in my previous post.
Anyways, the problem that I am having right now is that the factor() function is ordering 98% of the bar graphs. However, it doesn't sort two bar graphs for some reason. I have no idea why it's sorting everything else EXCEPT the bar graph for two type of organization. military and telecom. Here is the dataframe for the type of organizations, telecom:
structure(list(entity_name = c("KDDI", "T-Mobile, Deutsche Telecom",
"AT&T", "AT&T", "KT Corp.", "TerraCom & YourTel", "Vodafone",
"Three", "Bell"), year = c(2006, 2006, 2008, 2010, 2012, 2013, 2013, 2017, 2017
), type_org = c("telecoms", "telecoms", "telecoms", "telecoms",
"telecoms", "telecoms", "telecoms", "telecoms", "telecoms"),
records_lost = c(4000000L, 17000000L, 100000L, 100000L, 8700000L, 180000L,
2000000L, 200000L, 1900000L),
abbreviation = structure(c(6L, 8L, 1L, 1L, 2L, 7L,
3L, 5L, 4L), .Label = c("AT&T", "KT Corp.", "Vodafone", "Bell",
"Three", "KDDI", "TerraCom & YourTel", "T-DT"), class = "factor")), .Names = c("entity_name",
"alt_name", "description", "year", "type_org", "leak_method",
"interesting", "records_lost", "data_sensitivity", "source_1",
"source_2", "source_3", "source_name", "abbreviation"), row.names = c(NA,
9L), class = "data.frame")
Here is the dataframe for military:
structure(list(entity_name = c("US Dept of Defense", "US National Guard",
"US Military", "US Military", "US Army", "Stratfor", "Tricare"
), year = c(2009, 2009, 2009, 2010, 2011, 2011, 2011), type_org = c("military",
"military", "military", "military", "military", "military", "military"
), records_lost = c(72000L, 130000L, 76000000L, 300000L, 50000L, 900000L, 4900000L), abbreviation = structure(c(2L,
3L, 6L, 6L, 4L, 1L, 5L), .Label = c("Stratfor", "US Dept of Defense",
"US National Guard", "US Army", "Tricare", "US Military"), class = "factor")), .Names = c("entity_name",
"alt_name", "description", "year", "type_org", "leak_method",
"interesting", "records_lost", "data_sensitivity", "source_1",
"source_2", "source_3", "source_name", "abbreviation"), row.names = c(NA,
7L), class = "data.frame")
For some reason, these two are not sorted numerically like the other type of organizations and I'm not sure what to do. I tried using the arrange() dplyr function as well, but that doesn't do anything. I don't understand why it sorts all the other bar graphs though. Would anyone happen to know how to fix this?

Using mutate and a lookup/calc funtion

I wrote a function where I pass a company name to lookup in a 2nd table a set of records, calculate a complicated result, and return the result.
I want to process all companies and add a value to each record with that result.
I am using the following code:
`aa <- mutate(companies,newcol=sum_rounds(companies$company_name))`
But I get the following warning:
Warning message:
In c("Bwom", "Symple", "TravelTriangle", "Ark Biosciences", "Artizan Biosciences", :
longer object length is not a multiple of shorter object length
(each of these is a company name)
The company dataframe gets a new column, but all values are "false" where actually there should be both true and false.
Any advice would be welcome to a newbie.
Function follows:
sum_rounds<-function(co_name) {
#get records from rounds for the company name passed to the function
#remove NAs from column roundtype too
outval<- rounds %>%
filter(company_name.x==co_name & !is.na(roundtype)) %>%
#sort by date round is announced
arrange(announced_on) %>%
select(roundtype) %>%
#create a string of all round types in order
apply(2,paste,collapse="")
#the values from mixed to "M", venture to "V" and pureangel to "A"
# now see if it is of the form aaaaa (and #) followed by m or v
# in grep: ^ is start of a line and + is for ar least one copy
# [mv] is either m or v
# nice summary is here: http://www.endmemo.com/program/R/gsub.php
#is angel2vc?
angel2vc<-grepl("^a+[mv]+",outval)
#return(list("roundcodes"=outval,"angel2vc"=angel2vc))
return(angel2vc)
}
DPUT from Companies table Follows:
structure(list(company_name = c("Bwom", "Symple", "TravelTriangle",
"Ark Biosciences", "Artizan Biosciences", "Audiense"), domain = c("b-wom.com",
"getsymple.com", "traveltriangle.com", "arkbiosciences.com",
NA, "audiense.com"), country_code = c("ESP", "USA", "USA", "CHN",
"USA", "GBR"), state_code = c(NA, "CA", "VA", NA, "NC", NA),
region = c("Barcelona", "SF Bay Area", "Washington, D.C.",
"Shanghai", "Raleigh", "London"), city = c("Barcelona", "San Francisco",
"Charlottesville", "Shanghai", "Durham", "London"), status = c("operating",
"operating", "operating", "operating", "operating", "operating"
), short_description = c("Bwom is a tool that offers a test and personalized exercises for women's intimate health.",
"Symple is the cloud platform for all your business payments. Pay, get paid, connect.",
"TravelTriangle enables travel enthusiasts to reserve a personalized holiday plan with a local travel agent.",
"Ark Biosciences is a biopharmaceutical company that is dedicated to the discovery and development",
"Artizan Biosciences", "SaaS developer delivering unique consumer insight and engagement capabilities to many of the world’s biggest brands and agencies."
), category_list = c("health care", "cloud computing|machine learning|mobile apps|mobile payments|retail technology",
"e-commerce|personalization|tourism|travel", "health care",
"biopharma", "analytics|apps|marketing|market research|social crm|social media|social media marketing"
), category_group_list = c("health care", "apps|commerce and shopping|data and analytics|financial services|hardware|internet services|mobile|payments|software",
"commerce and shopping|travel and tourism", "health care",
"biotechnology|health care|science and engineering", "apps|data and analytics|design|information technology|internet services|media and entertainment|sales and marketing|software"
), employee_count = c("1 to 10", "11 to 50", "101 to 250",
NA, "1 to 10", "51 to 100"), funding_rounds = c(2L, 1L, 4L,
2L, 2L, 5L), funding_total_usd = c(1075791, 120000, 19900000,
NA, 3e+06, 8013391), founded_on = structure(c(16555, 16770,
15156, 16071, NA, 14975), class = "Date"), first_funding_on = structure(c(16526,
17204, 15492, 16532, 17091, 15294), class = "Date"), last_funding_on = structure(c(17204,
17204, 17204, 17203, 17203, 17203), class = "Date"), closed_on = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), email = c("hello#b-wom.com", "info#getsymple.com",
"admin#traveltriangle.com", "info#arkbiosciences.com", NA,
"moreinfo#audiense.com"), phone = c(NA, NA, "'+91 98 99 120408",
"###############################################################################################################################################################################################################################################################",
NA, "###############################################################################################################################################################################################################################################################"
), cb_url = c("https://www.crunchbase.com/organization/bwom",
"https://www.crunchbase.com/organization/symple-2", "https://www.crunchbase.com/organization/traveltriangle-com",
"https://www.crunchbase.com/organization/ark-biosciences",
"https://www.crunchbase.com/organization/artizan-biosciences",
"https://www.crunchbase.com/organization/socialbro"), twitter_url = c("https://www.twitter.com/hellobwom",
NA, "https://www.twitter.com/traveltriangle", NA, NA, "https://www.twitter.com/socialbro"
), facebook_url = c("https://www.facebook.com/hellobwom/?fref=ts",
NA, "http://www.facebook.com/traveltriangle", NA, NA, "http://www.facebook.com/socialbro"
), uuid = c("e6096d58-3454-d982-0dbe-7de9b06cd493", "fd0ab78f-0dc4-1f18-21d1-7ce9ff7a173b",
"742043c1-c17a-4526-4ed0-e911e6e9555b", "8e27eb22-ce03-a2af-58ba-53f0f458f49c",
"ed07ac9e-1071-fca0-46d9-42035c2da505", "fed333e5-2754-7413-1e3d-5939d70541d2"
), isbio = c("other", "other", "other", "other", "bio", "other"
), co_type = c("m", "m", "m", "v", "v", "m")), .Names = c("company_name",
"domain", "country_code", "state_code", "region", "city", "status",
"short_description", "category_list", "category_group_list",
"employee_count", "funding_rounds", "funding_total_usd", "founded_on",
"first_funding_on", "last_funding_on", "closed_on", "email",
"phone", "cb_url", "twitter_url", "facebook_url", "uuid", "isbio",
"co_type"), row.names = c(NA, -6L), class = c("tbl_df", "tbl",
"data.frame"))
>

Resources