am really new at R and I can't find the way of subsetting matrix rows given a list of indexes.
I have a dataframe called 'demo' with 855 rows and 3 columns that looks like this:
## Subject AGE DX
## 1 011_S_0002_bl 74.3 0
## 2 011_S_0003_bl 81.3 1
## 3 011_S_0005_bl 73.7 0
## 4 022_S_0007_bl 75.4 1
## 5 011_S_0008_bl 84.5 0
## 6 011_S_0010_bl 73.9 1
From this, I want to extract the indexes for all the rows that match DX == 1. So I do:
rownames(demo[demo$DX == 1,])
Which returns:
## [1] "2" "4" "6" "14" "20" "31" "33" "34" "36" "39" "40" "41"
## [13] "46" "47" "53" "54" "55" "58" "64" "67" "69" "70" "72" "81"
## [25] "84" "87" "88" "92" "96" "98" "100" "101" "106" "108" "109" "112"
....
Now I have a matrix called T_hat with 855 rows and 1 column that looks like this:
## [,1]
## [1,] 5.812925
## [2,] 10.477721
## [3,] 1.519726
## [4,] -0.221328
## [5,] 1.784920
What I want is to use the numbers in 'al' to subset the values with the corresponding numbers in the indexes and to get something like this:
## [,1]
## [2,] 10.477721
## [4,] -0.221328
...and so on.
I've tried all these options:
T_hat_a <- T_hat[rownames(demo[demo$DX == 1,]),1]
T_hat_b <- T_hat[is.numeric(rownames(demo[demo$DX == 1,])),1]
T_hat_c <- T_hat[rownames(T_hat) %in% rownames(demo[demo$DX == 1,]),1]
T_hat_d <- T_hat[rownames(T_hat) %in% is.numeric(rownames(demo[demo$DX == 1,])),1]
But none returns what I expect.
T_hat_a = ERROR "no 'dimnames' attributes for array
T_hat_b = numeric(0)
T_hat_c = numeric(0)
T_hat_d = numeric(0)
I've also tried to convert my matrix to a df, but only the T_hat_a option returns a result, but it is not at all as desired, since it returns different values...
Hi what I want is to be able to get the front page of Coinmarketcap into a dataframe. This is what I got so far but the data looks unorganize and I don't know how to make into a neat df.
library(jsonlite)
library ( tidyverse)
library( rvest )
# lets get what is marketcap today.
json_data <- read_html(c ( 'https://coinmarketcap.com/')) %>%
html_node("#__NEXT_DATA__") %>%
html_text() %>%
fromJSON()
json_data$props$initialState$cryptocurrency$listingLatest$data
What I end up getting is a long list that I cannot make sense of. I know its in there because the list looks something like this but I dont know how to parse this.
121] "quotes.2.percentChange60d" "quotes.2.percentChange7d" "quotes.2.percentChange90d" "quotes.2.price"
[125] "quotes.2.selfReportedMarketCap" "quotes.2.turnover" "quotes.2.volume24h" "quotes.2.volume30d"
[129] "quotes.2.volume7d" "quotes.2.ytdPriceChangePercentage" "rank" "selfReportedCirculatingSupply"
[133] "slug" "symbol" "totalSupply" "tvl"
[[1]]$id
[1] "COMPRESSED_KEYS_ARR"
[[1]]$excludeProps
[1] "auditInfoList"
[[2]]
[1] "68789.6259389221" "65.5260009765625" "18908943" "1" "2013-04-28T00:00:00.000Z"
[6] "TRUE" "FALSE" "50755.7211665326" "1" "1"
[11] "FALSE" "2021-12-23T19:20:02.000Z" "48065.8375264037" "8093" "21000000"
[16] "Bitcoin" "40.4175" "1065349214847.34" "2021-12-23T19:21:02.000Z" "18897342.6115399"
[21] "18897342.6115399" "BTC" "0" "0" "0"
[26] "0" "0" "0" "1" "0"
[31] "0.02793205" "527841.47774037" "21776428.8780472" "3626419.86588612" "72.706"
[36] "40.4175" "1065349214847.34" "2021-12-23T19:21:02.000Z" "232885004.198773" "232885004.198773"
[41] "ETH" "-0.189131" "0.653349" "-11.42415087" "-16.02722155"
[46] "3.129837" "19.93155879" "12.31613021" "0" "0.02793205"
[51] "6504955.07684694" "268365972.663341" "44690876.5456617" "72.706" "40.4175"
[56] "1065349214847.34" "2021-12-23T19:20:02.000Z" "959267979935.385" "959267979935.385" "USD"
[61] "0.53649283" "3.98091259" "-11.42415087" "-16.02722155" "5.84148872"
[66] "19.93155879" "50730.9149927304" "0" "0.02793205" "26794319100.1314"
[71] "1105416320667.99" "184084531389.181" "72.706" "40.4175" "1065349214847.34"
[76] "2021-12-23T19:21:02.000Z" "18897342.6115399" "18897342.6115399" "BTC" "0"
[81] "0" "0" "0" "0" "0"
[86] "1" "0" "0.02793205" "527841.47774037" "21776428.8780472"
[91] "3626419.86588612" "72.706" "40.4175" "1065349214847.34" "2021-12-23T19:21:02.000Z"
[96] "232885004.198773" "232885004.198773" "ETH" "-0.189131" "0.653349"
[101] "-11.42415087" "-16.02722155" "3.129837" "19.93155879" "12.31613021"
[106] "0" "0.02793205" "6504955.07684694" "268365972.663341" "44690876.5456617"
[111] "72.706" "40.4175" "1065349214847.34" "2021-12-23T19:20:02.000Z" "959267979935.385"
[116] "959267979935.385" "USD" "0.53649283" "3.98091259" "-11.42415087"
[121] "-16.02722155" "5.84148872" "19.93155879" "50730.9149927304" "0"
[126] "0.02793205" "26794319100.1314" "1105416320667.99" "184084531389.181" "72.706"
[131] "1" "0" "bitcoin" "BTC" "18908943"
[136] NA NA
[[3]]
[1] "4891.70469755141" "0.420897006988525" "118860687.6865" "2" "2015-08-07T00:00:00.000Z"
[6] "TRUE" "FALSE" "4119.08504574469" "1027" "1"
[11] "FALSE" "2021-12-23T19:20:02.000Z" "3897.23447281111" "4509" NA
[16] "Ethereum" "20.6197" "489234090606.33" "2021-12-23T19:21:02.000Z" "9637790.92058901"
[21] "9637790.92058901" "BTC" "0.277187" "-0.842643" "-4.49917037"
What I eventually want is something like how I can retrieve with the historical data.
json_data <- read_html("https://coinmarketcap.com/historical/20150621/") %>%
html_node("#__NEXT_DATA__") %>%
html_text() %>%
fromJSON()
df_data <- json_data$props$initialState$cryptocurrency$listingHistorical$data
> head ( df_data )
id name symbol slug num_market_pairs date_added tags max_supply circulating_supply total_supply platform.id
1 1 Bitcoin BTC bitcoin NA 2013-04-28T00:00:00.000Z mineable 21000000 14298800 14298800 NA
2 52 XRP XRP xrp NA 2013-08-04T00:00:00.000Z 100000000000 31908551587 99998976018 NA
3 2 Litecoin LTC litecoin NA 2013-04-28T00:00:00.000Z mineable 84000000 40119404 40119404 NA
4 74 Dogecoin DOGE dogecoin NA 2013-12-15T00:00:00.000Z mineable NA 99890370337 99890370337 NA
5 463 BitShares BTS bitshares NA 2014-07-21T00:00:00.000Z 3600570502 2511953117 2511953117 NA
6 512 Stellar XLM stellar NA 2014-08-05T00:00:00.000Z NA 4837354256 100804167862 NA
Use html_table:
library(jsonlite)
library ( tidyverse)
library( rvest )
# lets get what is marketcap today.
json_data <- read_html(c ( 'https://coinmarketcap.com/')) %>%
html_nodes("table") %>% html_table(fill=T)
It returns a table.
> json_data
[[1]]
# A tibble: 100 x 11
`` `#` Name Price `24h %` `7d %` `Market Cap` `Volume(24h)`
<lgl> <int> <chr> <chr> <chr> <chr> <chr> <chr>
1 NA 1 Bitcoin1~ $50,77~ 3.61% 5.53% $960.18B$960,~ $28,207,384,9685~
2 NA 2 Ethereum~ $4,104~ 2.18% 1.88% $487.89B$487,~ $17,920,397,7984~
3 NA 3 Binance ~ $548.65 1.94% 2.67% $91.52B$91,51~ $1,860,150,3053,~
4 NA 4 Tether4U~ $1.00 0.04% 0.01% $77.38B$77,38~ $68,556,169,0906~
5 NA 5 Solana5S~ $189.82 4.75% 3.83% $58.55B$58,55~ $2,144,421,38811~
6 NA 6 Cardano6~ $1.47 8.69% 15.79% $49.08B$49,07~ $1,964,583,1431,~
7 NA 7 XRP7XRP $1.01 4.26% 23.58% $47.82B$47,81~ $4,211,885,8344,~
8 NA 8 USD Coin~ $1.00 0.05% 0.05% $42.57B$42,57~ $4,039,920,4424,~
9 NA 9 Terra9LU~ $92.66 3.16% 37.30% $34.02B$34,02~ $4,141,070,96044~
10 NA 10 Avalanch~ $122.16 1.27% 17.38% $29.71B$29,70~ $1,291,116,76510~
# ... with 90 more rows, and 3 more variables: Circulating Supply <chr>,
# Last 7 Days <lgl>, <lgl>
I have a data set that is in a .Rdata format - something I haven't worked with before. I would like to export the data to a csv or related file for use in Python. I've used "write.csv", "write.table", and a few others and while they all seem like they are writing to the file, when I open it it's completely blank. I've also tried converting the data to a dataframe before exporting with no luck so far.
After importing the file in R, the data is labeled as a Large array (1499904 elements, 11.5 Mb) with the following attributes:
> attributes(data.station)
$`dim`
[1] 12 31 288 7 2
$dimnames
$dimnames[[1]]
[1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
$dimnames[[2]]
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "21"
[22] "22" "23" "24" "25" "26" "27" "28" "29" "30" "31"
$dimnames[[3]]
[1] "" "00:05:00" "00:10:00" "00:15:00" "00:20:00" "00:25:00" "00:30:00" "00:35:00" "00:40:00"
[10] "00:45:00" "00:50:00" "00:55:00" "01:00:00" "01:05:00" "01:10:00" "01:15:00" "01:20:00" "01:25:00"
[19] "01:30:00" "01:35:00" "01:40:00" "01:45:00" "01:50:00" "01:55:00" "02:00:00" "02:05:00" "02:10:00"
[28] "02:15:00" "02:20:00" "02:25:00" "02:30:00" "02:35:00" "02:40:00" "02:45:00" "02:50:00" "02:55:00"
[37] "03:00:00" "03:05:00" "03:10:00" "03:15:00" "03:20:00" "03:25:00" "03:30:00" "03:35:00" "03:40:00"
[46] "03:45:00" "03:50:00" "03:55:00" "04:00:00" "04:05:00" "04:10:00" "04:15:00" "04:20:00" "04:25:00"
[55] "04:30:00" "04:35:00" "04:40:00" "04:45:00" "04:50:00" "04:55:00" "05:00:00" "05:05:00" "05:10:00"
[64] "05:15:00" "05:20:00" "05:25:00" "05:30:00" "05:35:00" "05:40:00" "05:45:00" "05:50:00" "05:55:00"
[73] "06:00:00" "06:05:00" "06:10:00" "06:15:00" "06:20:00" "06:25:00" "06:30:00" "06:35:00" "06:40:00"
[82] "06:45:00" "06:50:00" "06:55:00" "07:00:00" "07:05:00" "07:10:00" "07:15:00" "07:20:00" "07:25:00"
[91] "07:30:00" "07:35:00" "07:40:00" "07:45:00" "07:50:00" "07:55:00" "08:00:00" "08:05:00" "08:10:00"
[100] "08:15:00" "08:20:00" "08:25:00" "08:30:00" "08:35:00" "08:40:00" "08:45:00" "08:50:00" "08:55:00"
[109] "09:00:00" "09:05:00" "09:10:00" "09:15:00" "09:20:00" "09:25:00" "09:30:00" "09:35:00" "09:40:00"
[118] "09:45:00" "09:50:00" "09:55:00" "10:00:00" "10:05:00" "10:10:00" "10:15:00" "10:20:00" "10:25:00"
[127] "10:30:00" "10:35:00" "10:40:00" "10:45:00" "10:50:00" "10:55:00" "11:00:00" "11:05:00" "11:10:00"
[136] "11:15:00" "11:20:00" "11:25:00" "11:30:00" "11:35:00" "11:40:00" "11:45:00" "11:50:00" "11:55:00"
[145] "12:00:00" "12:05:00" "12:10:00" "12:15:00" "12:20:00" "12:25:00" "12:30:00" "12:35:00" "12:40:00"
[154] "12:45:00" "12:50:00" "12:55:00" "13:00:00" "13:05:00" "13:10:00" "13:15:00" "13:20:00" "13:25:00"
[163] "13:30:00" "13:35:00" "13:40:00" "13:45:00" "13:50:00" "13:55:00" "14:00:00" "14:05:00" "14:10:00"
[172] "14:15:00" "14:20:00" "14:25:00" "14:30:00" "14:35:00" "14:40:00" "14:45:00" "14:50:00" "14:55:00"
[181] "15:00:00" "15:05:00" "15:10:00" "15:15:00" "15:20:00" "15:25:00" "15:30:00" "15:35:00" "15:40:00"
[190] "15:45:00" "15:50:00" "15:55:00" "16:00:00" "16:05:00" "16:10:00" "16:15:00" "16:20:00" "16:25:00"
[199] "16:30:00" "16:35:00" "16:40:00" "16:45:00" "16:50:00" "16:55:00" "17:00:00" "17:05:00" "17:10:00"
[208] "17:15:00" "17:20:00" "17:25:00" "17:30:00" "17:35:00" "17:40:00" "17:45:00" "17:50:00" "17:55:00"
[217] "18:00:00" "18:05:00" "18:10:00" "18:15:00" "18:20:00" "18:25:00" "18:30:00" "18:35:00" "18:40:00"
[226] "18:45:00" "18:50:00" "18:55:00" "19:00:00" "19:05:00" "19:10:00" "19:15:00" "19:20:00" "19:25:00"
[235] "19:30:00" "19:35:00" "19:40:00" "19:45:00" "19:50:00" "19:55:00" "20:00:00" "20:05:00" "20:10:00"
[244] "20:15:00" "20:20:00" "20:25:00" "20:30:00" "20:35:00" "20:40:00" "20:45:00" "20:50:00" "20:55:00"
[253] "21:00:00" "21:05:00" "21:10:00" "21:15:00" "21:20:00" "21:25:00" "21:30:00" "21:35:00" "21:40:00"
[262] "21:45:00" "21:50:00" "21:55:00" "22:00:00" "22:05:00" "22:10:00" "22:15:00" "22:20:00" "22:25:00"
[271] "22:30:00" "22:35:00" "22:40:00" "22:45:00" "22:50:00" "22:55:00" "23:00:00" "23:05:00" "23:10:00"
[280] "23:15:00" "23:20:00" "23:25:00" "23:30:00" "23:35:00" "23:40:00" "23:45:00" "23:50:00" "23:55:00"
$dimnames[[4]]
[1] "tempinf" "tempf" "humidityin" "humidity" "solarradiation" "hourlyrainin"
[7] "windspeedmph"
$dimnames[[5]]
[1] "2020" "2021"
Any advice on how to handle this? Thank you!
You have to flatten the array to write it. First we create a reproducible example of your data:
x <- 1:(2 * 3 * 4 * 5 * 6)
dnames <- list(LETTERS[1:2], LETTERS[3:5], LETTERS[6:9], LETTERS[10:14], LETTERS[15:20])
y <- array(x, dim=c(2, 3, 4, 5, 6), dimnames=dnames)
str(y)
# int [1:2, 1:3, 1:4, 1:5, 1:6] 1 2 3 4 5 6 7 8 9 10 ...
# - attr(*, "dimnames")=List of 5
# ..$ : chr [1:2] "A" "B"
# ..$ : chr [1:3] "C" "D" "E"
# ..$ : chr [1:4] "F" "G" "H" "I"
# ..$ : chr [1:5] "J" "K" "L" "M" ...
# ..$ : chr [1:6] "O" "P" "Q" "R" ...
attributes(y)
# $dim
# [1] 2 3 4 5 6
#
# $dimnames
# $dimnames[[1]]
# [1] "A" "B"
#
# $dimnames[[2]]
# [1] "C" "D" "E"
#
# $dimnames[[3]]
# [1] "F" "G" "H" "I"
#
# $dimnames[[4]]
# [1] "J" "K" "L" "M" "N"
#
# $dimnames[[5]]
# [1] "O" "P" "Q" "R" "S" "T"
Now we flatten the array and write it to a file:
z <- as.data.frame.table(y)
str(z)
# 'data.frame': 720 obs. of 6 variables:
# $ Var1: Factor w/ 2 levels "A","B": 1 2 1 2 1 2 1 2 1 2 ...
# $ Var2: Factor w/ 3 levels "C","D","E": 1 1 2 2 3 3 1 1 2 2 ...
# $ Var3: Factor w/ 4 levels "F","G","H","I": 1 1 1 1 1 1 2 2 2 2 ...
# $ Var4: Factor w/ 5 levels "J","K","L","M",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Var5: Factor w/ 6 levels "O","P","Q","R",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Freq: int 1 2 3 4 5 6 7 8 9 10 ...
write.csv(z, file="dfz.csv", row.names=FALSE)
Finally we read the file and convert it back to an array:
a <- read.csv("dfz.csv", as.is=FALSE)
b <- xtabs(Freq~., a)
class(b) <- "array"
attr(b, "call") <- NULL
names(dimnames(b)) <- NULL
str(b)
# int [1:2, 1:3, 1:4, 1:5, 1:6] 1 2 3 4 5 6 7 8 9 10 ...
# - attr(*, "dimnames")=List of 5
# ..$ : chr [1:2] "A" "B"
# ..$ : chr [1:3] "C" "D" "E"
# ..$ : chr [1:4] "F" "G" "H" "I"
# ..$ : chr [1:5] "J" "K" "L" "M" ...
# ..$ : chr [1:6] "O" "P" "Q" "R" ...
I have a list.
I want to compare elements pairwise and then merge the elements if they meet a criteria
sample list:
[[1]]
[1] "466" "1758" "975"
[[2]]
[1] "1528" "185" "975"
[[3]]
[1] "1528" "185" "975"
[[4]]
[1] "2892" "758" "971"
[[5]]
[1] "1321" "274" "969"
[[6]]
[1] "1321" "274" "969"
[[7]]
[1] "115" "1321" "969"
[[8]]
[1] "1321" "441" "969"
[[9]]
[1] "504" "61" "948"
[[10]]
[1] "504" "61" "948"
Suppose the criteria is simply: if element 1 is equal to element 2 then merge them and remove the second element from the list.
Expected output:
[[1]]
[1] "466" "1758" "975"
[[2]]
[1] "1528" "185" "975" "1528" "185" "975"
[[3]]
[1] "2892" "758" "971"
[[4]]
[1] "1321" "274" "969" "1321" "274" "969"
[[5]]
[1] "115" "1321" "969"
[[6]]
[1] "1321" "441" "969"
[[7]]
[1] "504" "61" "948" "504" "61" "948"
Thanks
We could create a logical index with duplicated, extract the elements from the list and concatenate with Map, update the list and extract only those elements based on the index
i1 <- duplicated(lst1)
i2 <- duplicated(lst1, fromLast = TRUE)
lst2 <- lst1
lst2[i1] <- Map(c, lst1[i1], lst1[i2])
lst2[!i2]
#[[1]]
#[1] "466" "1758" "975"
#[[2]]
#[1] "1528" "185" "975" "1528" "185" "975"
#[[3]]
[#1] "2892" "758" "971"
#[[4]]
#[1] "1321" "274" "969" "1321" "274" "969"
#[[5]]
#[1] "115" "1321" "969"
#[[6]]
#[1] "1321" "441" "969"
#[[7]]
#[1] "504" "61" "948" "504" "61" "948"
Or using split
lst2[i1] <- lapply(split(lst1[i1|i2], lst1[i1|i2], drop = TRUE), unlist)
data
lst1 <- list(c("466", "1758", "975"), c("1528", "185", "975"), c("1528",
"185", "975"), c("2892", "758", "971"), c("1321", "274", "969"
), c("1321", "274", "969"), c("115", "1321", "969"), c("1321",
"441", "969"), c("504", "61", "948"), c("504", "61", "948"))
I have a 5-level factor that looks like the following:
tmp
[1] NA
[2] 1,2,3,6,11,12,13,18,20,21,22,26,29,33,40,43,46
[3] NA
[4] NA
[5] 5,9,16,24,35,36,42
[6] 4,7,10,14,15,17,19,23,25,27,28,30,31,32,34,37,38,41,44,45,47,48,49,50
[7] 8,39
5 Levels: 1,2,3,6,11,12,13,18,20,21,22,26,29,33,40,43,46 ...
I want to access the items within each level except NA. So I use the levels() function, which gives me:
> levels(tmp)
[1] "1,2,3,6,11,12,13,18,20,21,22,26,29,33,40,43,46"
[2] "4,7,10,14,15,17,19,23,25,27,28,30,31,32,34,37,38,41,44,45,47,48,49,50"
[3] "5,9,16,24,35,36,42"
[4] "8,39"
[5] "NA"
Then I would like to access the elements in each level, and store them as numbers. However, for example,
>as.numeric(cat(levels(tmp)[3]))
5,9,16,24,35,36,42numeric(0)
Can you help me removing the commas within the numbers and the numeric(0) at the very end. I would like to have a vector of numerics 5, 9, 16, 24, 35, 36, 42 so that I can use them as indices to access a data frame. Thanks!
You need to use a combination of unlist, strsplit and unique.
First, recreate your data:
dat <- read.table(text="
NA
1,2,3,6,11,12,13,18,20,21,22,26,29,33,40,43,46
NA
NA
5,9,16,24,35,36,42
4,7,10,14,15,17,19,23,25,27,28,30,31,32,34,37,38,41,44,45,47,48,49,50
8,39")$V1
Next, find all the unique levels, after using strsplit:
sort(unique(unlist(
sapply(levels(dat), function(x)unlist(strsplit(x, split=",")))
)))
[1] "1" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "2" "20" "21" "22" "23" "24" "25" "26"
[20] "27" "28" "29" "3" "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "4" "40" "41" "42" "43"
[39] "44" "45" "46" "47" "48" "49" "5" "50" "6" "7" "8" "9"
Does this do what you want?
levels_split <- strsplit(levels(tmp), ",")
lapply(levels_split, as.numeric)
Using Andrie's dat
val <- scan(text=levels(dat),sep=",")
#Read 50 items
split(val,cumsum(c(T,diff(val) <0)))
#$`1`
#[1] 1 2 3 6 11 12 13 18 20 21 22 26 29 33 40 43 46
#$`2`
#[1] 4 7 10 14 15 17 19 23 25 27 28 30 31 32 34 37 38 41 44 45 47 48 49 50
#$`3`
#[1] 5 9 16 24 35 36 42
#$`4`
#[1] 8 39