Scraping front page Coinmarketcap into a dataframe - r

Hi what I want is to be able to get the front page of Coinmarketcap into a dataframe. This is what I got so far but the data looks unorganize and I don't know how to make into a neat df.
library(jsonlite)
library ( tidyverse)
library( rvest )
# lets get what is marketcap today.
json_data <- read_html(c ( 'https://coinmarketcap.com/')) %>%
html_node("#__NEXT_DATA__") %>%
html_text() %>%
fromJSON()
json_data$props$initialState$cryptocurrency$listingLatest$data
What I end up getting is a long list that I cannot make sense of. I know its in there because the list looks something like this but I dont know how to parse this.
121] "quotes.2.percentChange60d" "quotes.2.percentChange7d" "quotes.2.percentChange90d" "quotes.2.price"
[125] "quotes.2.selfReportedMarketCap" "quotes.2.turnover" "quotes.2.volume24h" "quotes.2.volume30d"
[129] "quotes.2.volume7d" "quotes.2.ytdPriceChangePercentage" "rank" "selfReportedCirculatingSupply"
[133] "slug" "symbol" "totalSupply" "tvl"
[[1]]$id
[1] "COMPRESSED_KEYS_ARR"
[[1]]$excludeProps
[1] "auditInfoList"
[[2]]
[1] "68789.6259389221" "65.5260009765625" "18908943" "1" "2013-04-28T00:00:00.000Z"
[6] "TRUE" "FALSE" "50755.7211665326" "1" "1"
[11] "FALSE" "2021-12-23T19:20:02.000Z" "48065.8375264037" "8093" "21000000"
[16] "Bitcoin" "40.4175" "1065349214847.34" "2021-12-23T19:21:02.000Z" "18897342.6115399"
[21] "18897342.6115399" "BTC" "0" "0" "0"
[26] "0" "0" "0" "1" "0"
[31] "0.02793205" "527841.47774037" "21776428.8780472" "3626419.86588612" "72.706"
[36] "40.4175" "1065349214847.34" "2021-12-23T19:21:02.000Z" "232885004.198773" "232885004.198773"
[41] "ETH" "-0.189131" "0.653349" "-11.42415087" "-16.02722155"
[46] "3.129837" "19.93155879" "12.31613021" "0" "0.02793205"
[51] "6504955.07684694" "268365972.663341" "44690876.5456617" "72.706" "40.4175"
[56] "1065349214847.34" "2021-12-23T19:20:02.000Z" "959267979935.385" "959267979935.385" "USD"
[61] "0.53649283" "3.98091259" "-11.42415087" "-16.02722155" "5.84148872"
[66] "19.93155879" "50730.9149927304" "0" "0.02793205" "26794319100.1314"
[71] "1105416320667.99" "184084531389.181" "72.706" "40.4175" "1065349214847.34"
[76] "2021-12-23T19:21:02.000Z" "18897342.6115399" "18897342.6115399" "BTC" "0"
[81] "0" "0" "0" "0" "0"
[86] "1" "0" "0.02793205" "527841.47774037" "21776428.8780472"
[91] "3626419.86588612" "72.706" "40.4175" "1065349214847.34" "2021-12-23T19:21:02.000Z"
[96] "232885004.198773" "232885004.198773" "ETH" "-0.189131" "0.653349"
[101] "-11.42415087" "-16.02722155" "3.129837" "19.93155879" "12.31613021"
[106] "0" "0.02793205" "6504955.07684694" "268365972.663341" "44690876.5456617"
[111] "72.706" "40.4175" "1065349214847.34" "2021-12-23T19:20:02.000Z" "959267979935.385"
[116] "959267979935.385" "USD" "0.53649283" "3.98091259" "-11.42415087"
[121] "-16.02722155" "5.84148872" "19.93155879" "50730.9149927304" "0"
[126] "0.02793205" "26794319100.1314" "1105416320667.99" "184084531389.181" "72.706"
[131] "1" "0" "bitcoin" "BTC" "18908943"
[136] NA NA
[[3]]
[1] "4891.70469755141" "0.420897006988525" "118860687.6865" "2" "2015-08-07T00:00:00.000Z"
[6] "TRUE" "FALSE" "4119.08504574469" "1027" "1"
[11] "FALSE" "2021-12-23T19:20:02.000Z" "3897.23447281111" "4509" NA
[16] "Ethereum" "20.6197" "489234090606.33" "2021-12-23T19:21:02.000Z" "9637790.92058901"
[21] "9637790.92058901" "BTC" "0.277187" "-0.842643" "-4.49917037"
What I eventually want is something like how I can retrieve with the historical data.
json_data <- read_html("https://coinmarketcap.com/historical/20150621/") %>%
html_node("#__NEXT_DATA__") %>%
html_text() %>%
fromJSON()
df_data <- json_data$props$initialState$cryptocurrency$listingHistorical$data
> head ( df_data )
id name symbol slug num_market_pairs date_added tags max_supply circulating_supply total_supply platform.id
1 1 Bitcoin BTC bitcoin NA 2013-04-28T00:00:00.000Z mineable 21000000 14298800 14298800 NA
2 52 XRP XRP xrp NA 2013-08-04T00:00:00.000Z 100000000000 31908551587 99998976018 NA
3 2 Litecoin LTC litecoin NA 2013-04-28T00:00:00.000Z mineable 84000000 40119404 40119404 NA
4 74 Dogecoin DOGE dogecoin NA 2013-12-15T00:00:00.000Z mineable NA 99890370337 99890370337 NA
5 463 BitShares BTS bitshares NA 2014-07-21T00:00:00.000Z 3600570502 2511953117 2511953117 NA
6 512 Stellar XLM stellar NA 2014-08-05T00:00:00.000Z NA 4837354256 100804167862 NA

Use html_table:
library(jsonlite)
library ( tidyverse)
library( rvest )
# lets get what is marketcap today.
json_data <- read_html(c ( 'https://coinmarketcap.com/')) %>%
html_nodes("table") %>% html_table(fill=T)
It returns a table.
> json_data
[[1]]
# A tibble: 100 x 11
`` `#` Name Price `24h %` `7d %` `Market Cap` `Volume(24h)`
<lgl> <int> <chr> <chr> <chr> <chr> <chr> <chr>
1 NA 1 Bitcoin1~ $50,77~ 3.61% 5.53% $960.18B$960,~ $28,207,384,9685~
2 NA 2 Ethereum~ $4,104~ 2.18% 1.88% $487.89B$487,~ $17,920,397,7984~
3 NA 3 Binance ~ $548.65 1.94% 2.67% $91.52B$91,51~ $1,860,150,3053,~
4 NA 4 Tether4U~ $1.00 0.04% 0.01% $77.38B$77,38~ $68,556,169,0906~
5 NA 5 Solana5S~ $189.82 4.75% 3.83% $58.55B$58,55~ $2,144,421,38811~
6 NA 6 Cardano6~ $1.47 8.69% 15.79% $49.08B$49,07~ $1,964,583,1431,~
7 NA 7 XRP7XRP $1.01 4.26% 23.58% $47.82B$47,81~ $4,211,885,8344,~
8 NA 8 USD Coin~ $1.00 0.05% 0.05% $42.57B$42,57~ $4,039,920,4424,~
9 NA 9 Terra9LU~ $92.66 3.16% 37.30% $34.02B$34,02~ $4,141,070,96044~
10 NA 10 Avalanch~ $122.16 1.27% 17.38% $29.71B$29,70~ $1,291,116,76510~
# ... with 90 more rows, and 3 more variables: Circulating Supply <chr>,
# Last 7 Days <lgl>, <lgl>

Related

Replace string based on partial match - tidyverse

I have a column that are characters. There are observations where ph was added to the end of the string. Any string in the column that has ph in the string I want replaced with "NA". Here is what I have tried and gives the below error.
Column
wind_speed_m_s <- c("1.7", "0.7", "0", "0.6", "0.4", "1.2", "1.9", "1.3", "2.0, gust to 3.7",
"0.5", "1.8", "1.4", "3.4", "2.8", "1.6", "2", NA, "0.9", "0.8",
"1", "1.1", "2.6", "2.4", "1.1ph", "1.7 kt", "2.1", "1.5", "0ph",
"3", ".4 /s", "0.3", "2.3", "0.2", "3.3", "3.9ph", "1.5ph", "1ph",
"2ph", "1.7ph", "0.8 ph", "1.5 ph", "2.2", "1.9 k/hr", "2.5",
"NA", "0.4/s", "1/s")
date <- data_raw %>%
mutate(wind_speed_m_s = str_replace(wind_speed_m_s, pattern = str_detect("ph"), "NA"))
Error in `mutate()`:
! Problem while computing `wind_speed_m_s = str_replace(wind_speed_m_s, pattern = str_detect("ph"), "NA")`.
Caused by error in `type()`:
! argument "pattern" is missing, with no default
Backtrace:
1. ... %>% ...
9. stringr::str_detect("ph")
10. stringr:::type(pattern)
We may use str_detect within case_when. In the OP's code, it had only a single argument i.e. pattern and without the data
library(dplyr)
library(stringr)
data_raw %>%
mutate(wind_speed_m_s = case_when(str_detect(wind_speed_m_s, "ph",
negate = TRUE)~ wind_speed_m_s))
-output
wind_speed_m_s
1 1.7
2 0.7
3 0
4 0.6
5 0.4
6 1.2
7 1.9
8 1.3
9 2.0, gust to 3.7
10 0.5
11 1.8
12 1.4
13 3.4
14 2.8
15 1.6
16 2
17 <NA>
18 0.9
19 0.8
20 1
21 1.1
22 2.6
23 2.4
24 <NA>
25 1.7 kt
26 2.1
27 1.5
28 <NA>
29 3
30 .4 /s
31 0.3
32 2.3
33 0.2
34 3.3
35 <NA>
36 <NA>
37 <NA>
38 <NA>
39 <NA>
40 <NA>
41 <NA>
42 2.2
43 1.9 k/hr
44 2.5
45 NA
46 0.4/s
47 1/s
You can use sub:
sub(".*ph$", NA, wind_speed_m_s)
[1] "1.7" "0.7" "0"
[4] "0.6" "0.4" "1.2"
[7] "1.9" "1.3" "2.0, gust to 3.7"
[10] "0.5" "1.8" "1.4"
[13] "3.4" "2.8" "1.6"
[16] "2" NA "0.9"
[19] "0.8" "1" "1.1"
[22] "2.6" "2.4" NA
[25] "1.7 kt" "2.1" "1.5"
[28] NA "3" ".4 /s"
[31] "0.3" "2.3" "0.2"
[34] "3.3" NA NA
[37] NA NA NA
[40] NA NA "2.2"
[43] "1.9 k/hr" "2.5" "NA"
[46] "0.4/s" "1/s"
Also can do:
is.na(wind_speed_m_s) <- grepl("ph$", wind_speed_m_s)
Note that $ is needed to indicate the end of the string incase there is another ph in the middle of a string. If you need anything that has ph regardless as to whether its at the end or the middle, just remove the $
We can use grep to identify where the patter is and use it as index for replacement.
> wind_speed_m_s[grep("ph", wind_speed_m_s)] <- NA
> wind_speed_m_s
[1] "1.7" "0.7" "0" "0.6" "0.4" "1.2"
[7] "1.9" "1.3" "2.0, gust to 3.7" "0.5" "1.8" "1.4"
[13] "3.4" "2.8" "1.6" "2" NA "0.9"
[19] "0.8" "1" "1.1" "2.6" "2.4" NA
[25] "1.7 kt" "2.1" "1.5" NA "3" ".4 /s"
[31] "0.3" "2.3" "0.2" "3.3" NA NA
[37] NA NA NA NA NA "2.2"
[43] "1.9 k/hr" "2.5" "NA" "0.4/s" "1/s"

How to obtain values from a matrix using stored numbers as indexes in R

am really new at R and I can't find the way of subsetting matrix rows given a list of indexes.
I have a dataframe called 'demo' with 855 rows and 3 columns that looks like this:
## Subject AGE DX
## 1 011_S_0002_bl 74.3 0
## 2 011_S_0003_bl 81.3 1
## 3 011_S_0005_bl 73.7 0
## 4 022_S_0007_bl 75.4 1
## 5 011_S_0008_bl 84.5 0
## 6 011_S_0010_bl 73.9 1
From this, I want to extract the indexes for all the rows that match DX == 1. So I do:
rownames(demo[demo$DX == 1,])
Which returns:
## [1] "2" "4" "6" "14" "20" "31" "33" "34" "36" "39" "40" "41"
## [13] "46" "47" "53" "54" "55" "58" "64" "67" "69" "70" "72" "81"
## [25] "84" "87" "88" "92" "96" "98" "100" "101" "106" "108" "109" "112"
....
Now I have a matrix called T_hat with 855 rows and 1 column that looks like this:
## [,1]
## [1,] 5.812925
## [2,] 10.477721
## [3,] 1.519726
## [4,] -0.221328
## [5,] 1.784920
What I want is to use the numbers in 'al' to subset the values with the corresponding numbers in the indexes and to get something like this:
## [,1]
## [2,] 10.477721
## [4,] -0.221328
...and so on.
I've tried all these options:
T_hat_a <- T_hat[rownames(demo[demo$DX == 1,]),1]
T_hat_b <- T_hat[is.numeric(rownames(demo[demo$DX == 1,])),1]
T_hat_c <- T_hat[rownames(T_hat) %in% rownames(demo[demo$DX == 1,]),1]
T_hat_d <- T_hat[rownames(T_hat) %in% is.numeric(rownames(demo[demo$DX == 1,])),1]
But none returns what I expect.
T_hat_a = ERROR "no 'dimnames' attributes for array
T_hat_b = numeric(0)
T_hat_c = numeric(0)
T_hat_d = numeric(0)
I've also tried to convert my matrix to a df, but only the T_hat_a option returns a result, but it is not at all as desired, since it returns different values...

write.csv returns a blank file in R

I have a data set that is in a .Rdata format - something I haven't worked with before. I would like to export the data to a csv or related file for use in Python. I've used "write.csv", "write.table", and a few others and while they all seem like they are writing to the file, when I open it it's completely blank. I've also tried converting the data to a dataframe before exporting with no luck so far.
After importing the file in R, the data is labeled as a Large array (1499904 elements, 11.5 Mb) with the following attributes:
> attributes(data.station)
$`dim`
[1] 12 31 288 7 2
$dimnames
$dimnames[[1]]
[1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
$dimnames[[2]]
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "21"
[22] "22" "23" "24" "25" "26" "27" "28" "29" "30" "31"
$dimnames[[3]]
[1] "" "00:05:00" "00:10:00" "00:15:00" "00:20:00" "00:25:00" "00:30:00" "00:35:00" "00:40:00"
[10] "00:45:00" "00:50:00" "00:55:00" "01:00:00" "01:05:00" "01:10:00" "01:15:00" "01:20:00" "01:25:00"
[19] "01:30:00" "01:35:00" "01:40:00" "01:45:00" "01:50:00" "01:55:00" "02:00:00" "02:05:00" "02:10:00"
[28] "02:15:00" "02:20:00" "02:25:00" "02:30:00" "02:35:00" "02:40:00" "02:45:00" "02:50:00" "02:55:00"
[37] "03:00:00" "03:05:00" "03:10:00" "03:15:00" "03:20:00" "03:25:00" "03:30:00" "03:35:00" "03:40:00"
[46] "03:45:00" "03:50:00" "03:55:00" "04:00:00" "04:05:00" "04:10:00" "04:15:00" "04:20:00" "04:25:00"
[55] "04:30:00" "04:35:00" "04:40:00" "04:45:00" "04:50:00" "04:55:00" "05:00:00" "05:05:00" "05:10:00"
[64] "05:15:00" "05:20:00" "05:25:00" "05:30:00" "05:35:00" "05:40:00" "05:45:00" "05:50:00" "05:55:00"
[73] "06:00:00" "06:05:00" "06:10:00" "06:15:00" "06:20:00" "06:25:00" "06:30:00" "06:35:00" "06:40:00"
[82] "06:45:00" "06:50:00" "06:55:00" "07:00:00" "07:05:00" "07:10:00" "07:15:00" "07:20:00" "07:25:00"
[91] "07:30:00" "07:35:00" "07:40:00" "07:45:00" "07:50:00" "07:55:00" "08:00:00" "08:05:00" "08:10:00"
[100] "08:15:00" "08:20:00" "08:25:00" "08:30:00" "08:35:00" "08:40:00" "08:45:00" "08:50:00" "08:55:00"
[109] "09:00:00" "09:05:00" "09:10:00" "09:15:00" "09:20:00" "09:25:00" "09:30:00" "09:35:00" "09:40:00"
[118] "09:45:00" "09:50:00" "09:55:00" "10:00:00" "10:05:00" "10:10:00" "10:15:00" "10:20:00" "10:25:00"
[127] "10:30:00" "10:35:00" "10:40:00" "10:45:00" "10:50:00" "10:55:00" "11:00:00" "11:05:00" "11:10:00"
[136] "11:15:00" "11:20:00" "11:25:00" "11:30:00" "11:35:00" "11:40:00" "11:45:00" "11:50:00" "11:55:00"
[145] "12:00:00" "12:05:00" "12:10:00" "12:15:00" "12:20:00" "12:25:00" "12:30:00" "12:35:00" "12:40:00"
[154] "12:45:00" "12:50:00" "12:55:00" "13:00:00" "13:05:00" "13:10:00" "13:15:00" "13:20:00" "13:25:00"
[163] "13:30:00" "13:35:00" "13:40:00" "13:45:00" "13:50:00" "13:55:00" "14:00:00" "14:05:00" "14:10:00"
[172] "14:15:00" "14:20:00" "14:25:00" "14:30:00" "14:35:00" "14:40:00" "14:45:00" "14:50:00" "14:55:00"
[181] "15:00:00" "15:05:00" "15:10:00" "15:15:00" "15:20:00" "15:25:00" "15:30:00" "15:35:00" "15:40:00"
[190] "15:45:00" "15:50:00" "15:55:00" "16:00:00" "16:05:00" "16:10:00" "16:15:00" "16:20:00" "16:25:00"
[199] "16:30:00" "16:35:00" "16:40:00" "16:45:00" "16:50:00" "16:55:00" "17:00:00" "17:05:00" "17:10:00"
[208] "17:15:00" "17:20:00" "17:25:00" "17:30:00" "17:35:00" "17:40:00" "17:45:00" "17:50:00" "17:55:00"
[217] "18:00:00" "18:05:00" "18:10:00" "18:15:00" "18:20:00" "18:25:00" "18:30:00" "18:35:00" "18:40:00"
[226] "18:45:00" "18:50:00" "18:55:00" "19:00:00" "19:05:00" "19:10:00" "19:15:00" "19:20:00" "19:25:00"
[235] "19:30:00" "19:35:00" "19:40:00" "19:45:00" "19:50:00" "19:55:00" "20:00:00" "20:05:00" "20:10:00"
[244] "20:15:00" "20:20:00" "20:25:00" "20:30:00" "20:35:00" "20:40:00" "20:45:00" "20:50:00" "20:55:00"
[253] "21:00:00" "21:05:00" "21:10:00" "21:15:00" "21:20:00" "21:25:00" "21:30:00" "21:35:00" "21:40:00"
[262] "21:45:00" "21:50:00" "21:55:00" "22:00:00" "22:05:00" "22:10:00" "22:15:00" "22:20:00" "22:25:00"
[271] "22:30:00" "22:35:00" "22:40:00" "22:45:00" "22:50:00" "22:55:00" "23:00:00" "23:05:00" "23:10:00"
[280] "23:15:00" "23:20:00" "23:25:00" "23:30:00" "23:35:00" "23:40:00" "23:45:00" "23:50:00" "23:55:00"
$dimnames[[4]]
[1] "tempinf" "tempf" "humidityin" "humidity" "solarradiation" "hourlyrainin"
[7] "windspeedmph"
$dimnames[[5]]
[1] "2020" "2021"
Any advice on how to handle this? Thank you!
You have to flatten the array to write it. First we create a reproducible example of your data:
x <- 1:(2 * 3 * 4 * 5 * 6)
dnames <- list(LETTERS[1:2], LETTERS[3:5], LETTERS[6:9], LETTERS[10:14], LETTERS[15:20])
y <- array(x, dim=c(2, 3, 4, 5, 6), dimnames=dnames)
str(y)
# int [1:2, 1:3, 1:4, 1:5, 1:6] 1 2 3 4 5 6 7 8 9 10 ...
# - attr(*, "dimnames")=List of 5
# ..$ : chr [1:2] "A" "B"
# ..$ : chr [1:3] "C" "D" "E"
# ..$ : chr [1:4] "F" "G" "H" "I"
# ..$ : chr [1:5] "J" "K" "L" "M" ...
# ..$ : chr [1:6] "O" "P" "Q" "R" ...
attributes(y)
# $dim
# [1] 2 3 4 5 6
#
# $dimnames
# $dimnames[[1]]
# [1] "A" "B"
#
# $dimnames[[2]]
# [1] "C" "D" "E"
#
# $dimnames[[3]]
# [1] "F" "G" "H" "I"
#
# $dimnames[[4]]
# [1] "J" "K" "L" "M" "N"
#
# $dimnames[[5]]
# [1] "O" "P" "Q" "R" "S" "T"
Now we flatten the array and write it to a file:
z <- as.data.frame.table(y)
str(z)
# 'data.frame': 720 obs. of 6 variables:
# $ Var1: Factor w/ 2 levels "A","B": 1 2 1 2 1 2 1 2 1 2 ...
# $ Var2: Factor w/ 3 levels "C","D","E": 1 1 2 2 3 3 1 1 2 2 ...
# $ Var3: Factor w/ 4 levels "F","G","H","I": 1 1 1 1 1 1 2 2 2 2 ...
# $ Var4: Factor w/ 5 levels "J","K","L","M",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Var5: Factor w/ 6 levels "O","P","Q","R",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Freq: int 1 2 3 4 5 6 7 8 9 10 ...
write.csv(z, file="dfz.csv", row.names=FALSE)
Finally we read the file and convert it back to an array:
a <- read.csv("dfz.csv", as.is=FALSE)
b <- xtabs(Freq~., a)
class(b) <- "array"
attr(b, "call") <- NULL
names(dimnames(b)) <- NULL
str(b)
# int [1:2, 1:3, 1:4, 1:5, 1:6] 1 2 3 4 5 6 7 8 9 10 ...
# - attr(*, "dimnames")=List of 5
# ..$ : chr [1:2] "A" "B"
# ..$ : chr [1:3] "C" "D" "E"
# ..$ : chr [1:4] "F" "G" "H" "I"
# ..$ : chr [1:5] "J" "K" "L" "M" ...
# ..$ : chr [1:6] "O" "P" "Q" "R" ...

Calculate individual list totals and output as a vector

I have a list with 1000 elements, the elements of which are vectors of 100 values. I want to sum these elements but each time every list has the same value as an output. How can this be done?
[[1]]
[1] ....
...
[[1000]]
[1] 41.796588400 1.822177817 0.516105021 16.554318711 22.441116192 11.557223237
[7] 11.610201393 14.126722844 11.165417165 17.024791387 97.744736046 1.053429931
[13] 5.409970556 10.534262466 2.402112926 61.989253054 89.141315737 7.831002594
[19] 0.229311742 1.167366732 74.131595409 26.837412033 0.315262754 3.662595556
[25] 7.621307733 6.599907692 2.436551709 50.371429645 0.046652228 84.050028030
[31] 2.547629448 8.308966616 9.566100355 1.324906725 35.296845475 80.754003596
[37] 53.073032197 0.506524295 0.478822391 14.147898302 0.292336489 45.329947475
[43] 25.455486564 20.790057839 12.622231025 38.933121408 41.196719977 3.762513880
[49] 88.326438565 0.006009079 18.974940292 18.964924610 4.299943187 0.266114761
[55] 16.597228049 1.030058767 15.304970202 12.220887655 2.229263654 18.506392124
[61] 8.455070746 0.000839928 0.621677398 16.936509072 10.599982129 5.542332913
[67] 0.773795046 20.199178278 33.488631341 4.624800890 0.069347211 11.352912859
[73] 20.614961806 2.986133970 1.185518764 33.563723467 15.468933119 2.360548396
[79] 8.237662458 50.279689216 1.307944799 17.654806254 42.129699374 2.352254185
[85] 1.069597812 12.714936626 4.677094902 0.085737588 11.653287453 15.610804195
[91] 5.489030702 0.202041121 2.849800157 5.284956342 0.128010723 5.731836865
[97] 3.635845442 11.560654785 0.800697847 0.719558593
is it not as simple as:
lapply(x, sum)
? Here is what I get:
> x <- list(rep(1,100), rep(2,100), rep(3,100))
> lapply(x,length)
[[1]]
[1] 100
[[2]]
[1] 100
[[3]]
[1] 100
> lapply(x,head)
[[1]]
[1] 1 1 1 1 1 1
[[2]]
[1] 2 2 2 2 2 2
[[3]]
[1] 3 3 3 3 3 3
> lapply(x,sum)
[[1]]
[1] 100
[[2]]
[1] 200
[[3]]
[1] 300

reformatting data frame with List in R

Helo, I am trying to reshape a data.frame in R such that each row will repeat with a different value from a list, then the next row will repeat from a differing value from the second entry of the list.
the list is called, wrk, dfx is the dataframe I want to reshape, and listOut is what I want to end up with.
Thank you very much for your help.
> wrk
[[1]]
[1] "41" "42" "44" "45" "97" "99" "100" "101" "102"
[10] "103" "105" "123" "124" "126" "127" "130" "132" "135"
[19] "136" "137" "138" "139" "140" "141" "158" "159" "160"
[28] "161" "162" "163" "221" "223" "224" ""
[[2]]
[1] "41" "42" "44" "45" "98" "99" "100" "101" "102"
[10] "103" "105" "123" "124" "126" "127" "130" "132" "135"
[19] "136" "137" "138" "139" "140" "141" "158" "159" "160"
[28] "161" "162" "163" "221" "223" "224" ""
>dfx
projectScore highestRankingGroup
1 0.8852 1
2 0.8845 2
>listOut
projectScore highestRankingGroup wrk
1 0.8852 1 41
2 0.8852 1 42
3 0.8852 1 44
4 0.8852 1 45
5 0.8852 1 97
6 0.8852 1 99
7 0.8852 1 100
8 0.8852 1 101
...
35 0.8845 2 41
36 0.8845 2 42
37 0.8845 2 44
38 0.8845 2 45
39 0.8845 2 98
40 0.8845 2 99
41 0.8845 2 100
How about replicate rows of dfx and cbind with unlisted wrk:
listOut <- cbind(
dfx[rep(seq_along(wrk), sapply(wrk, length)), ],
wrk = unlist(wrk)
)
How about:
If wrk contains simple vectors like in your example:
> szs<-sapply(wrk, length)
> fulldfr<-do.call(c, wrk)
> listOut<-cbind(dfx[rep(seq_along(szs), szs),], fulldfr)
If wrk contains dataframes:
> szs<-sapply(wrk, function(dfr){dim(dfr)[1]})
> fulldfr<-do.call(rbind, wrk)
> listOut<-cbind(dfx[rep(seq_along(szs), szs),], fulldfr)
How about:
expand.grid(dfx$projectScore, dfx$highestRankingGroup, wrk[[1]])
Edit:
Maybe you can eleborate a bit more, because this does seem to work:
a <- c("41","42","44","45","97","99","100","101","102","103","105", "123","124","126","127","130","132","135","136","137","138","139","140","141","158","159","160","161","162","163","221","223","224")
wrk <-list(a, a)
dfx <- data.frame(projectScore=c(0.8852, 0.8845), highestRankingGroup=c(1,2))
listOut <- expand.grid(dfx$projectScore, dfx$highestRankingGroup, wrk[[1]])
names(listOut) <- c("projectScore", "highestRankingGroup", "wrk")
listOut[order(-listOut$projectScore,listOut$highestRankingGroup, listOut$wrk),]

Resources