Related
library(rvest)
library(dplyr)
library(tidyr)
library(spotifyr)
library(tidytext)
library(textdata)
Using the above libraries I scraped artist data from Spotify using the API token.
I've got the data of words with sentiments (i.e. anger let's say) and the details about the songs.
I now want to run it in loop over multiple such word category (eg. anger) and see which words are most used in that particular emotion, and in general too I want to plot a histogram for the words used in the songs.
So I use the following functions:
data %>%
unnest() %>%
unnest_tokens(word, lyric) %>%
anti_join(stop_words, by = "word") %>%
left_join(angry_words, by = "word") %>%
group_by(track_name, energy, album_name, duration_ms, valence) %>%
summarize(angry_words = sum(anger, na.rm = TRUE)) %>%
ungroup() %>%
select(track_name, album_name, angry_words) %>%
arrange(desc(angry_words))
Every-time I run the code I get the following error:
Error in `fn()`:
! In row 64, can't recycle input of size 3 to size 2.
Run `rlang::last_error()` to see where the error occurred.
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(album_images, artists, available_markets)`
All suggestions will be helpful.
Here the data and angry_words data frames are:
data <- structure(list(artist_name = c("María José Llergo", "María José Llergo"
), artist_id = c("70GBRlKEGjfueop2lfdQ4Q", "70GBRlKEGjfueop2lfdQ4Q"
), album_id = c("6BMyWViSAgXtUVlPfXiGES", "6BMyWViSAgXtUVlPfXiGES"
), album_type = c("album", "album"), album_images = list(structure(list(
height = c(640L, 300L, 64L), url = c("https://i.scdn.co/image/ab67616d0000b2735f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d00001e025f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d000048515f3d845e18e06df1bbe95178"
), width = c(640L, 300L, 64L)), class = "data.frame", row.names = c(NA,
3L)), structure(list(height = c(640L, 300L, 64L), url = c("https://i.scdn.co/image/ab67616d0000b2735f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d00001e025f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d000048515f3d845e18e06df1bbe95178"
), width = c(640L, 300L, 64L)), class = "data.frame", row.names = c(NA,
3L))), album_release_date = c("2020-01-31", "2020-01-31"), album_release_year = c(2020,
2020), album_release_date_precision = c("day", "day"), danceability = c(0.612,
0.5), energy = c(0.342, 0.267), key = c(4L, 7L), loudness = c(-9.193,
-11.736), mode = 0:1, speechiness = c(0.0419, 0.0448), acousticness = c(0.358,
0.815), instrumentalness = c(0.000502, 2.66e-06), liveness = c(0.257,
0.0981), valence = c(0.122, 0.264), tempo = c(99.993, 114.192
), track_id = c("7pB0e4E78UfAmKBPzQPo8a", "1sgH6adzL1BBaIXRC7NOYI"
), analysis_url = c("https://api.spotify.com/v1/audio-analysis/7pB0e4E78UfAmKBPzQPo8a",
"https://api.spotify.com/v1/audio-analysis/1sgH6adzL1BBaIXRC7NOYI"
), time_signature = 3:4, artists = list(structure(list(href = "https://api.spotify.com/v1/artists/70GBRlKEGjfueop2lfdQ4Q",
id = "70GBRlKEGjfueop2lfdQ4Q", name = "María José Llergo",
type = "artist", uri = "spotify:artist:70GBRlKEGjfueop2lfdQ4Q",
external_urls.spotify = "https://open.spotify.com/artist/70GBRlKEGjfueop2lfdQ4Q"), class = "data.frame", row.names = 1L),
structure(list(href = "https://api.spotify.com/v1/artists/70GBRlKEGjfueop2lfdQ4Q",
id = "70GBRlKEGjfueop2lfdQ4Q", name = "María José Llergo",
type = "artist", uri = "spotify:artist:70GBRlKEGjfueop2lfdQ4Q",
external_urls.spotify = "https://open.spotify.com/artist/70GBRlKEGjfueop2lfdQ4Q"), class = "data.frame", row.names = 1L)),
available_markets = list(c("AD", "AE", "AG", "AL", "AM",
"AO", "AR", "AT", "AU", "AZ", "BA", "BB", "BD", "BE", "BF",
"BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW",
"BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO",
"CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO",
"DZ", "EC", "EE", "EG", "ES", "FI", "FJ", "FM", "FR", "GA",
"GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW",
"GY", "HK", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN",
"IQ", "IS", "IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
"KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK",
"LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME",
"MG", "MH", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV",
"MW", "MX", "MY", "MZ", "NA", "NE", "NG", "NI", "NL", "NO",
"NP", "NR", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL",
"PS", "PT", "PW", "PY", "QA", "RO", "RS", "RW", "SA", "SB",
"SC", "SE", "SG", "SI", "SK", "SL", "SM", "SN", "SR", "ST",
"SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TO", "TR",
"TT", "TV", "TW", "TZ", "UA", "UG", "US", "UY", "UZ", "VC",
"VE", "VN", "VU", "WS", "XK", "ZA", "ZM", "ZW"), c("AD",
"AE", "AG", "AL", "AM", "AO", "AR", "AT", "AU", "AZ", "BA",
"BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO",
"BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH",
"CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE",
"DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "FI",
"FJ", "FM", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN",
"GQ", "GR", "GT", "GW", "GY", "HK", "HN", "HR", "HT", "HU",
"ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "JP",
"KE", "KG", "KH", "KI", "KM", "KN", "KR", "KW", "KZ", "LA",
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY",
"MA", "MC", "MD", "ME", "MG", "MH", "MK", "ML", "MN", "MO",
"MR", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", "NE",
"NG", "NI", "NL", "NO", "NP", "NR", "NZ", "OM", "PA", "PE",
"PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO",
"RS", "RW", "SA", "SB", "SC", "SE", "SG", "SI", "SK", "SL",
"SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ",
"TL", "TN", "TO", "TR", "TT", "TV", "TW", "TZ", "UA", "UG",
"US", "UY", "UZ", "VC", "VE", "VN", "VU", "WS", "XK", "ZA",
"ZM", "ZW")), disc_number = c(1L, 1L), duration_ms = c(197316L,
313028L), explicit = c(FALSE, FALSE), track_href = c("https://api.spotify.com/v1/tracks/7pB0e4E78UfAmKBPzQPo8a",
"https://api.spotify.com/v1/tracks/1sgH6adzL1BBaIXRC7NOYI"
), is_local = c(FALSE, FALSE), track_name = c("¿De Qué Me Sirve Llorar?",
"Niña De Las Dunas"), track_preview_url = c("https://p.scdn.co/mp3-preview/1ed3fba536f1813af99c88f69893dfe6272df847?cid=cf686ca455c74783b8f27d0c35dfc5b0",
"https://p.scdn.co/mp3-preview/e4f9386ef79ff5027800aa9ccd8560a622df28d0?cid=cf686ca455c74783b8f27d0c35dfc5b0"
), track_number = 1:2, type = c("track", "track"), track_uri = c("spotify:track:7pB0e4E78UfAmKBPzQPo8a",
"spotify:track:1sgH6adzL1BBaIXRC7NOYI"), external_urls.spotify = c("https://open.spotify.com/track/7pB0e4E78UfAmKBPzQPo8a",
"https://open.spotify.com/track/1sgH6adzL1BBaIXRC7NOYI"),
album_name = c("Sanación", "Sanación"), key_name = c("E",
"G"), mode_name = c("minor", "major"), key_mode = c("E minor",
"G major")), row.names = 1:2, class = "data.frame")
angry_words <- structure(list(word = c("abandoned", "abandonment", "abhor",
"abhorrent", "abolish", "abomination", "abuse", "accursed", "accusation",
"accused", "accuser", "accusing", "actionable", "adder", "adversary",
"adverse", "adversity", "advocacy", "affront", "aftermath", "aggravated",
"aggravating", "aggravation", "aggression", "aggressive", "aggressor",
"agitated", "agitation", "agony", "alcoholism", "alienate", "alienation",
"allegation", "altercation", "ambush", "anarchism", "anarchist",
"anarchy", "anathema", "anger", "angry", "anguish", "animosity",
"animus", "annihilate", "annihilated", "annihilation", "annoy",
"annoyance", "annoying", "antagonism", "antagonist", "antagonistic",
"antichrist", "antipathy", "antisocial", "antithesis", "anxiety",
"argue", "argument", "argumentation", "arguments", "armament",
"armed", "arraignment", "arrogant", "arson", "assail", "assailant",
"assassin", "assassinate", "assassination", "assault", "asshole",
"atrocious", "atrocity", "attack", "attacking", "attorney", "avarice"
), anger = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)), row.names = c(NA, -80L), class = c("tbl_df",
"tbl", "data.frame"))
So I need to prepare a term-document-matrix for each of the sets of text I want to subsequently run against each other in a classification procedure (rolling.classify() in Stylo package).
So I created a tdm of the whole text corpus, then want to make two sets only of selected texts, one which should contain one text only. So multiple texts works fine (a), but one texts only does not (b), I cannot do this?
freq.list <- make.frequency.list(words, head = 265) # Creating frequency list using only the frequencies of the
# selected features from word-list (words)
word.frequencies <- make.table.of.frequencies(corpus = x, features = freq.list)
# Document-term matrix of whole corpus and matching frequencies.
# Making two subsets now:
a <- word.frequencies[c(1,2,3,17,19,20,21,22,23), 1:263]
dim(a) # Double-check that it is the right no. of texts
b <- word.frequencies[18,1:263]
dim(b) # Double-check
> dim(a)
[1] 9 263
> dim(b)
NULL
data:
(used dput())
x <- structure(list(middleFr_Calmative_1946 = c("the", "calmative",
"i", "don’t", "know", "when", "i", "died", ".", "it", "always",
"seemed", "to", "me", "i", "died", "old", ",", "about", "ninety",
"years", "old", ",", "and", "what", "years", ",", "and", "that",
"my", "body", "bore", "it", "out", ",", "from", "head", "to",
"foot", ".", "but", "this", "evening", ",", "alone", "in", "my",
"icy", "bed", ",", "i", "have", "the", "feeling", "i’ll", "be",
"older", "than", "the", "day", ",", "the", "night", ",", "when",
"the", "sky", "with", "all", "its", "lights", "fell", "upon",
"me", ",", "the", "same", "i", "had", "so", "often", "gazed",
"resolved", "to", "speak", "to", "him", ".", "so", "i", "marshalled",
"the", "words", "and", "opened", "my", "mouth", ",", "thinking",
"i", "would", "hear", "them", ".", "but", "all", "i", "heard",
"was", "a", "kind", "of", "rattle", ",", "unintelligible", "even",
"have", "a", "penny", "in", "my", "pocket", ",", "nor", "anything",
"resembling", "it", "."), middleFr_End_1946 = c("the", "end",
"they", "clothed", "me", "and", "gave", "me", "money", ".", "i",
"back", "mine", ".", "i", "added", ",", "give", "me", "back",
"my", "greatcoat", ".", "they", "replied", "that", "they", "had",
"burnt", "them", ",", "together", "with", "my", "other", "clothes",
".", "i", "understood", "then", "that", "the", "end", "was",
"near", ",", "at", "least", "fairly", "near", ".", "later", "on",
"i", "tried", "to", "exchange", "this", "hat", "for", "a", "cap",
",", "or", "a", "slouch", "which", "could", "be", "pulled", "down",
"over", "my", "face", ",", "but", "without", "much", "success",
".", "and", "yet", "i", "could", "not", "go", "about", "bare",
"-", "headed", ",", "with", "my", "skull", "in", "the", "state",
"it", "was", ".", "at", "first", "this", "hat", "was", "too",
"small", ",", "then", "it", "got", "used", "to", "me", ".", "they",
"gave", "me", "a", "tie", ",", "after", "long", "discussion",
".", "it", "seemed", "a", "pretty", "tie", "to", "me", ",", "but",
"i", "didn’t", "like", "it", ".", "when", "it", "came", "at",
"last", "i", "was", "too", "tired", "to", "send", "it", "back",
".", "but", "in", "the", "end", "it", "came", "in", "useful",
".", "it", "was", "blue", ",", "with", "kinds", "of", "little",
"stars", ".", "i", "didn’t", "feel", "well", ",", "but", "they",
"told", "me", "i", "was", "well", "enough", "."), middleFr_Expelled_1946 = c("the",
"expelled", "there", "were", "not", "many", "steps", ".", "i",
"had", "counted", "them", "a", "thousand", "times", ",", "both",
"going", "up", "and", "coming", "down", ",", "but", "the", "figure",
"has", "gone", "from", "my", "mind", ".", "i", "have", "never",
"known", "whether", "you", "should", "say", "one", "with", "your",
"every", "day", "several", "times", "a", "day", ",", "until",
"they", "sink", "forever", "in", "the", "mud", ".", "that’s",
"an", "order", ".")), class = "stylo.corpus", call = load.corpus.and.parse(files = "all",
corpus.dir = "x", markup.type = "plain", corpus.lang = "English.all",
splitting.rule = ("[ \t\n]+"), sampling = "no.sampling",
features = "w", ngram.size = 1, preserve.case = FALSE, encoding = "UTF-8"))
freq.list <- c("", "-", "—", ",", ";", ":", "!", "?", ".", "’", "\"",
"(", ")", "a", "about", "above", "across", "after", "again",
"against", "ah", "all", "almost", "along", "Already", "also",
"always", "am", "among", "an", "and", "another", "any", "anything",
"are", "as", "at", "away", "back", "be", "because", "been", "before",
"behind", "being", "best", "better", "between", "beyond", "both",
"but", "by", "came", "can", "can't", "can’t", "cannot", "come",
"comes", "could", "did", "didn’t", "different", "do", "does",
"doing", "don't", "don’t", "done", "down", "each", "either",
"else", "even", "ever", "every", "everything", "except", "far",
"few", "fifteen", "first", "five", "for", "forward", "four",
"from", "get", "go", "goes", "going", "got", "great", "had",
"half", "has", "have", "having", "he", "her", "here", "herself",
"him", "himself", "his", "how", "however", "hundred", "i", "i'll",
"i'm", "i’ll", "if", "in", "indeed", "instead", "into", "is",
"it", "it's", "it’s", "its", "itself", "just", "last", "late",
"least", "left", "less", "let", "like", "little", "long", "made",
"make", "many", "may", "me", "merely", "might", "mine", "more",
"most", "moved", "much", "must", "my", "myself", "near", "neither",
"never", "next", "no", "none", "nor", "not", "nothing", "now",
"of", "off", "often", "oh", "on", "once", "one", "only", "or",
"other", "others", "otherwise", "our", "out", "over", "own",
"perhaps", "place", "quite", "rather", "really", "right", "said",
"same", "say", "second", "shall", "she", "should", "since", "six",
"small", "so", "some", "someone", "something", "sometimes", "somewhere",
"soon", "still", "such", "ten", "than", "that", "that's", "that’s",
"the", "their", "them", "themselves", "then", "there", "therefore",
"these", "they", "thing", "things", "third", "this", "those",
"though", "three", "through", "thus", "till", "time", "times",
"to", "together", "too", "towards", "two", "under", "unless",
"until", "up", "upon", "us", "very", "was", "way", "we", "well",
"went", "were", "what", "whatever", "when", "where", "whether",
"which", "while", "who", "whom", "whose", "why", "will", "with",
"within", "without", "won't", "would", "yes", "yet", "you", "your",
"yourself")
You can do:
b <- word.frequencies[18,1:263, drop = F]
dim(b)
# [1] 1 263
This question already has answers here:
How to join (merge) data frames (inner, outer, left, right)
(13 answers)
Closed 2 years ago.
I am working with the following two datasets :
will_can
structure(list(will_can.REGION = c("AB", "B", "B", "B", "BB",
"BB", "BD", "BH", "BH", "BH", "BR", "BS", "BS", "BT", "BT", "CF",
"CF", "CM", "CO", "CV", "CV", "CV", "CW", "DA", "DA", "DD", "DE",
"DE", "DG", "DG", "DG", "DG", "DL", "DN", "DT", "E", "E", "E",
"EH", "EH", "EH", "EH", "EH", "EH", "EH", "EX", "EX", "EX", "FK",
"FK", "FY", "G", "G", "G", "GL", "GL", "HA", "HD", "HD", "IV",
"KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA",
"KA", "KA", "KA", "KA", "KA", "KA", "KT", "KY", "KY", "KY", "L",
"L", "L", "LA", "LA", "LE", "LE", "M", "M", "ME", "ME", "MK",
"ML", "N", "N", "N", "NE", "NG", "NN", "NN", "NR", "NW", "OL",
"OX", "OX", "PH", "PO", "PR", "RG", "RH", "RM", "RM", "S", "S",
"S", "S", "SA", "SE", "SE", "SE", "SE", "SE", "SG", "SL", "SN",
"SN", "SO", "SO", "SO", "SS", "ST", "ST", "ST", "ST", "SW", "SW",
"SW", "SW", "SY", "SY", "SY", "TA", "TD", "TD", "TN", "TW", "UB",
"UB", "W", "W", "W", "W", "WA", "WC", "WD")), class = "data.frame", row.names = c(NA,
-156L))
will_can_region_norm
structure(list(norm = c(67.3112073766083, 0, 62.9924341677094,
0, 134.940019161483, 86.0271073135687, 233.710968710152, 0, 0,
136.210220315945, 72.0106074505199, 54.9624828839958, 0, 0, 46.5061888459603,
0, 51.9149234846709, 85.3970454501009, 0, 0, 141.438961332615,
122.50716299382, 197.887432921107, 96.646567080111, 108.996678489718,
873.779493880704, 0, 109.106806944561, 56.7421763178016, 249.99781251914,
0, 106.993398828272, 0, 182.997053590583, 0, 225.716259764203,
217.655353412983, 98.8344746903195, 70.3435951664196, 106.870878390986,
0, 0, 113.255439262354, 226.344150395729, 0, 0, 0, 0, 0, 0, 0,
0, 0, 92.5698187029358, 0, 1159.88543061088, 59.5746039659052,
0, 217.977759293264, 88.627745595238, 155.299651064979, 0, 70.3301130229532,
0, 0, 0, 0, 36.166169734453, 162.12380892704, 74.7710230881704,
112.29824076945, 120.249189991435, 25.6209421071498, 36.7120335621411,
115.238964414265, 0, 50.4621322067494, 59.9490876378327, 82.9160720202368,
132.342362545417, 0, 0, 209.987774511768, 0, 45.0104437732687,
59.5244437425851, 54.7420581590574, 77.921490980977, 132.545922191567,
100.083647410414, 51.5757713324224, 102.602449571922, 98.8984492920948,
0, 129.885834248271, 0, 189.332549749021, 149.846130500895, 0,
0, 73.4653456617979, 220.103517986062, 111.317004279081, 375.711503660056,
156.229153172374, 760.35739839154, 0, 83.1515916711375, 0, 0,
0, 73.5483180088058, 269.518568414391, 102.141462145838, 55.2886923953334,
151.949727736478, 148.297412239816, 0, 0, 0, 0, 0, 0, 0), REGION = c("AB",
"AL", "B", "BA", "BB", "BD", "BH", "BL", "BN", "BR", "BS", "BT",
"CA", "CB", "CF", "CH", "CM", "CO", "CR", "CT", "CV", "CW", "DA",
"DD", "DE", "DG", "DH", "DL", "DN", "DT", "DY", "E", "EC", "EH",
"EN", "EX", "FK", "FY", "G", "GL", "GU", "GY", "HA", "HD", "HG",
"HP", "HR", "HS", "HU", "HX", "IG", "IM", "IP", "IV", "JE", "KA",
"KT", "KW", "KY", "L", "LA", "LD", "LE", "LL", "LN", "LS", "LU",
"M", "ME", "MK", "ML", "N", "NE", "NG", "NN", "NP", "NR", "NW",
"OL", "OX", "PA", "PE", "PH", "PL", "PO", "PR", "RG", "RH", "RM",
"S", "SA", "SE", "SG", "SK", "SL", "SM", "SN", "SO", "SP", "SR",
"SS", "ST", "SW", "SY", "TA", "TD", "TF", "TN", "TQ", "TR", "TS",
"TW", "UB", "W", "WA", "WC", "WD", "WF", "WN", "WR", "WS", "WV",
"YO", "ZE")), row.names = c(NA, -124L), class = "data.frame")
I am trying to add a new column, will_can$norm, which would contain values of will_can_region_norm$norm based on matching values of the variable "REGION" which is the same in both datasets. So gaps from the second dataset to the first would be automatically filled based on matching strings of REGION
Based on another question in the forum I tried the following function:
will_can2 <- merge(will_can, will_can_region_norm[,"norm"], by = "REGION", all=TRUE)
But I get the following error:
Error in fix.by(by.y, y) :
'by' must specify a unique correct column [translated from French]
Is there something I'm missing here? Would be grateful for some help !
Cameron
For your merge(will_can, will_can_region_norm[,"norm"], by = "REGION", all=TRUE) command to work, both data.frames would need a column called REGION. In your example:
will_can doesn't have this column, but it does have one called will_can.REGION.
You've extracted a single column from will_can_norm called norm, and tried to merge based on that single column. Unfortunately, the merge() command never sees the REGION column of will_can_norm.
In your case, try something like
merge(will_can, will_can_region_norm, by.x = "will_can.REGION", by.y="REGION", all=TRUE)
I have the following values (800) in my data frame
cat1 <- c("bi", "bt", "ch", "fs", "hc", "lh", "mo", "ms", "nn", "ro", "sc", "si", "so", "ti", "ww", "dt", "3et", "a", "a", "a", "a", "a", "a", "aam", "aao", "ac", "acs", "aeo", "aeq", "afm", "aic", "aio", "akq", "am", "am", "am", "am", "amc", "amc", "aoq", "aoq", "aot", "apm", "apo", "apo", "aqf", "ass", "ata", "ata", "atc", "atf", "atq", "atr", "aun", "bae", "baf", "bai", "bcm", "bcs", "bea", "bee", "bef", "bem", "bem", "bem", "bem", "bem", "beo", "beo", "beq", "beq", "beq", "bhm", "bkr", "bm", "bm", "bme", "bmm", "bmm", "bmo", "bmq", "bmq", "brm", "brm", "brq", "bsm", "bsm", "bsm", "bsm", "bso", "bta", "bwa", "clm", "dd", "dm", "ne", "pp", "pv", "rt", "se", "sw")
I want to replace all string values with numeric values so that I can feed them in a neural network eg I want all "am" to be replaced with 5 or 0.5 and all "bem" to be replaced with 7 or 0.7 means according to some logic.
Tried many things but able to achieve anything
If you have know what is the replacement rule, you can establish a dictionary and using named variables for look up.
For instance,
cat1 <- c("bem","am","am","bem","am")
dict <- `names<-`(c(7,5),c("bem","am"))
res <- dict[cat1]
and you will get
> res
bem am am bem am
7 5 5 7 5
I have a long list of elements that I want to display vertically as checkboxes in my GUI.
Here is my code:
library(gWidgets)
library(gWidgetstcltk)
window <- gwindow("ITRAX Data Analysis")
Frame_Elements <- gframe( text="Elements", horizontal=FALSE, container=window)
Group7 <-ggroup(container = Frame_Elements, horizontal = TRUE)
Element_Options <- c( "Ac","Ag","Al", "Ar", "As", "At", "Au", "Ba", "Bi", "Br", "Ca", "Cd", "Ce", "Cl", "Co", "Cr", "Cs", "Cu",
"Dy", "Er", "Eu", "Fe", "Fr", "Ga", "Gd", "Ge", "Hf", "Hg", "Ho", "I", "In", "Ir", "K", "La", "Lu", "Mg",
"Mn", "Mo", "Nb", "Nd", "Ni", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Ra", "Rb", "Re", "Rh",
"Ru", "S", "Sb", "Sc", "Si", "Sm", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", "Tm", "U","V", "W", "Y",
"Yb", "Zn", "Zr")
Choose_Elements <- gcheckboxgroup(Element_Options, container = Group7, horizontal = FALSE, checked = FALSE, use.table = TRUE)
My problem is that the scroll table does not always open when I run the code. Sometimes I need to run it multiple times (without actually editing the code in any way) in order for the scroll box to appear. In other words, the window and frame of the GUI will appear, but will be empty.
Also, when the scroll box does open, the scroll bar doesn't actually work.
Any help with resolving this issue would be highly appreciated.