I want to recreate in R the following table :
I have been provided with only these three parameters:
initial_deposit = 15000
contract_value = 250
closing_stock_indices = c(1000, 1002, 994, 998, 997)
My effort in R is this:
cl =c(1000,1002,994,998,997) # The closing stock indices.
re = c(0,diff(cl))
t = time(cl)
mtm = re*250 # The contract value of 250.
mb = 15000+mtm # The initial deposit of 15000.
vm = ifelse(mb>0,0,mtm)
d = data.frame(t,cl,re,mtm,mb,vm);d
but I cannot do the last two columns.Any help ?
You may do the following
library(tidyverse)
initial_deposit = 15000
contract_value = 250
closing_stock_indices = c(1000, 1002, 994, 998, 997)
(seq_along(closing_stock_indices) - 1) %>%
as.data.frame() %>%
setNames('Day') %>%
mutate(Closing_SI = closing_stock_indices,
Daily_change = c(0, diff(Closing_SI)),
Marking_to_market = contract_value * Daily_change,
Margin_balance = accumulate(Marking_to_market[-1], .init = initial_deposit,
~ if (.x >= initial_deposit) .x + .y else initial_deposit + .y),
Variation_Margin = -1 * pmin(Margin_balance - initial_deposit, 0),
REquired_Deposit = c(initial_deposit, Variation_Margin[-n()]))
Day Closing_SI Daily_change Marking_to_market Margin_balance Variation_Margin REquired_Deposit
1 0 1000 0 0 15000 0 15000
2 1 1002 2 500 15500 0 0
3 2 994 -8 -2000 13500 1500 0
4 3 998 4 1000 16000 0 1500
5 4 997 -1 -250 15750 0 0
Check on another vector
initial_deposit = 15000
contract_value = 250
closing_stock_indices = c(1000, 1002, 994, 998, 997, 990, 1000)
(seq_along(closing_stock_indices) - 1) %>%
as.data.frame() %>%
setNames('Day') %>%
mutate(Closing_SI = closing_stock_indices,
Daily_change = c(0, diff(Closing_SI)),
Marking_to_market = contract_value * Daily_change,
Margin_balance = accumulate(Marking_to_market[-1], .init = initial_deposit,
~ if (.x >= initial_deposit) .x + .y else initial_deposit + .y),
Variation_Margin = -1 * pmin(Margin_balance - initial_deposit, 0),
REquired_Deposit = c(initial_deposit, Variation_Margin[-n()]))
Day Closing_SI Daily_change Marking_to_market Margin_balance Variation_Margin REquired_Deposit
1 0 1000 0 0 15000 0 15000
2 1 1002 2 500 15500 0 0
3 2 994 -8 -2000 13500 1500 0
4 3 998 4 1000 16000 0 1500
5 4 997 -1 -250 15750 0 0
6 5 990 -7 -1750 14000 1000 0
7 6 1000 10 2500 17500 0 1000
Related
I have tried to use UpsetR to visualize the input file which can be found here
> library("UpSetR")
> orthogroups_df<- read.table("orthogroups.GeneCount.tsv", header=T, stringsAsFactors = F)
> #All species
> selected_species <- colnames(orthogroups_df)[2:(ncol(orthogroups_df) -1)]
> selected_species
[1] "Atha" "Cann" "NQLD" "Natt" "Ngla" "Nlab" "Nsyl" "Ntab" "Ntom" "Slyc" "Stub" "Vvin"
> head(orthogroups_df)
Orthogroup Atha Cann NQLD Natt Ngla Nlab Nsyl Ntab Ntom Slyc Stub Vvin Total
1 OG0000000 0 0 965 0 0 3 0 0 0 0 0 0 968
2 OG0000001 0 1 3 0 0 448 0 0 0 0 0 0 452
3 OG0000002 0 1 313 0 0 120 1 0 1 0 0 0 436
4 OG0000003 0 93 15 21 46 16 33 63 36 25 39 26 413
5 OG0000004 1 42 2 34 109 6 8 154 11 9 4 0 380
6 OG0000005 0 2 61 1 34 44 91 70 43 20 1 0 367
> ncol(orthogroups_df)
[1] 14
> orthogroups_df[orthogroups_df > 0] <- 1
> upset(orthogroups_df,
+ nsets = ncol(orthogroups_df),
+ sets = rev(c(selected_species)),
+ queries = list(list(query = intersects, params = list("NQLD", "Nlab", "Nsyl"), color = "#238c45", active = T),
+ list(query = intersects, params = list("NQLD", "Nlab"), color = "#ffd977", active = T)))
Error in `$<-.data.frame`(`*tmp*`, "freq", value = 45L) :
replacement has 1 row, data has 0
How is it possible to fix the above error?
We need to set the number of intersects - nintersects - to a higher number so that sets in query params can be shown.
By default nintersects is set to 40, and list("NQLD", "Nlab", "Nsyl") appears after 40 at 90th position, so we need a bigger number, here I tried with 90:
upset(orthogroups_df,
nsets = ncol(orthogroups_df),
sets = rev(c(selected_species)),
nintersects = 90,
queries = list(
list(query = intersects,
params = list("NQLD", "Nlab", "Nsyl"),
color = "red",
active = TRUE),
list(query = intersects,
params = list("NQLD", "Nlab"),
color = "blue",
active = TRUE)))
I want to reproduce the following table from Hull's book Options & Derivatives.
I believe that accumulate function from dplyr might be suitable but I cannot reproduce it.
my effort is the following.
library(tidyverse)
initial_deposit = 12000
contract_value = 9000
closing_stock_indices = c(1250,1241,1238.3,1244.6,1241.3,1240.1, 1236.2, 1229.9,
1230.8,1225.4,1228.1,1211,1211,1214.3,1216.1,1223,1226.9)
(seq_along(closing_stock_indices) - 1) %>%
as.data.frame() %>%
setNames('Day') %>%
mutate(Closing_SI = closing_stock_indices,
Daily_change = c(0, diff(Closing_SI*200)),
Cum_gain = cumsum( Daily_change),
Margin_balance = accumulate(Cum_gain[-1], .init = initial_deposit,
~ if (.x >= initial_deposit).x + .y else initial_deposit + .y),
Variation_Margin = -1 * pmin(Margin_balance - initial_deposit, 0),
margin_call = c(0, Variation_Margin[-n()]))
or
tibble(closing_stock_indices)%>%
dplyr::mutate(Day = row_number())%>%
mutate(Close =closing_stock_indices )%>%
dplyr::mutate(y = as.numeric( Close - (dplyr::lag(Close, 1))))%>%
dplyr::select(-closing_stock_indices)%>%
dplyr::mutate(y = replace_na(y,0),
Cum_gain = 200*cumsum(y))%>%
dplyr::mutate(Margin_balance = initial_deposit+Cum_gain)
Is there any other alternative using rowwise() from dplyr?
I would appreciate any help.
You need to accumulate over the daily change not the cumulative change, otherwise you are cumulating twice. I have used a base lambda function instead of the purrr one for expressiveness:
library(tidyverse)
initial_deposit = 12000
contract_value = 9000
closing_stock_indices = c(1250,1241,1238.3,1244.6,1241.3,1240.1, 1236.2, 1229.9,
1230.8,1225.4,1228.1,1211,1211,1214.3,1216.1,1223,1226.9)
(seq_along(closing_stock_indices) - 1) %>%
as.data.frame() %>%
setNames('Day') %>%
mutate(
Closing_SI = closing_stock_indices,
Daily_change = c(0, diff(Closing_SI*200)),
Cum_gain = cumsum(Daily_change),
Margin_balance = accumulate(
Daily_change[-1],
.init = initial_deposit,
\(bal, gain) if (bal > contract_value) bal + gain
else initial_deposit + gain
),
margin_call = ifelse(Margin_balance < contract_value,
initial_deposit - Margin_balance,
0)
)
#> Day Closing_SI Daily_change Cum_gain Margin_balance margin_call
#> 1 0 1250.0 0 0 12000 0
#> 2 1 1241.0 -1800 -1800 10200 0
#> 3 2 1238.3 -540 -2340 9660 0
#> 4 3 1244.6 1260 -1080 10920 0
#> 5 4 1241.3 -660 -1740 10260 0
#> 6 5 1240.1 -240 -1980 10020 0
#> 7 6 1236.2 -780 -2760 9240 0
#> 8 7 1229.9 -1260 -4020 7980 4020
#> 9 8 1230.8 180 -3840 12180 0
#> 10 9 1225.4 -1080 -4920 11100 0
#> 11 10 1228.1 540 -4380 11640 0
#> 12 11 1211.0 -3420 -7800 8220 3780
#> 13 12 1211.0 0 -7800 12000 0
#> 14 13 1214.3 660 -7140 12660 0
#> 15 14 1216.1 360 -6780 13020 0
#> 16 15 1223.0 1380 -5400 14400 0
#> 17 16 1226.9 780 -4620 15180 0
I have this code
bucket <- seq(0, 100000, by = 5000)
dt <-
data.frame(sold_amount = bucket) %>%
mutate(bucket = cut(bucket, breaks = bucket, include.lowest = T, dig.lab = 10))
If I execute it, bucket [0, 5000] is duplicated, with include.lowest = T bucket for amount 0 is na How can i get bins [0,5000] for sold amount 0 and (5000,10000] for sold amount 5000?
Maybe this?
cut(bucket, breaks = c(bucket,Inf), include.lowest = T, right = FALSE, dig.lab = 10)
such that
> dt <-
+ data.frame(sold_amount = bucket) %>%
+ mutate(bucket = cut(bucket, breaks = c(bucket, Inf), include.lowest = T, right = FALSE, dig.lab = .... [TRUNCATED]
> dt
sold_amount bucket
1 0 [0,5000)
2 5000 [5000,10000)
3 10000 [10000,15000)
4 15000 [15000,20000)
5 20000 [20000,25000)
6 25000 [25000,30000)
7 30000 [30000,35000)
8 35000 [35000,40000)
9 40000 [40000,45000)
10 45000 [45000,50000)
11 50000 [50000,55000)
12 55000 [55000,60000)
13 60000 [60000,65000)
14 65000 [65000,70000)
15 70000 [70000,75000)
16 75000 [75000,80000)
17 80000 [80000,85000)
18 85000 [85000,90000)
19 90000 [90000,95000)
20 95000 [95000,100000)
21 100000 [100000,Inf]
A pragmatic approach would be to just filter out the offending line:
library(tidyverse)
bucket <- seq(0, 100000, by = 5000)
dt <-
data.frame(sold_amount = bucket) %>%
mutate(bucket = cut(bucket, breaks = bucket, include.lowest = T, dig.lab = 10)) %>%
dplyr::filter(sold_amount != 0)
> dt
sold_amount bucket
1 5000 [0,5000]
2 10000 (5000,10000]
3 15000 (10000,15000]
4 20000 (15000,20000]
5 25000 (20000,25000]
6 30000 (25000,30000]
7 35000 (30000,35000]
8 40000 (35000,40000]
9 45000 (40000,45000]
10 50000 (45000,50000]
11 55000 (50000,55000]
12 60000 (55000,60000]
13 65000 (60000,65000]
14 70000 (65000,70000]
15 75000 (70000,75000]
16 80000 (75000,80000]
17 85000 (80000,85000]
18 90000 (85000,90000]
19 95000 (90000,95000]
20 100000 (95000,100000]
Maybe just remove the first row
dt <-
data.frame(sold_amount = bucket) %>%
mutate(bucket = cut(bucket, breaks = bucket, include.lowest = T, dig.lab = 10))%>%
.[-1,]
dt
An approach with my santoku package:
library(santoku)
dt$bucket <- chop_width(dt$sold_amount, 5000, labels = lbl_intervals("%d"))
dt
sold_amount bucket
1 0 [0, 5000)
2 5000 [5000, 10000)
3 10000 [10000, 15000)
4 15000 [15000, 20000)
5 20000 [20000, 25000)
6 25000 [25000, 30000)
7 30000 [30000, 35000)
8 35000 [35000, 40000)
9 40000 [40000, 45000)
10 45000 [45000, 50000)
11 50000 [50000, 55000)
12 55000 [55000, 60000)
13 60000 [60000, 65000)
14 65000 [65000, 70000)
15 70000 [70000, 75000)
16 75000 [75000, 80000)
17 80000 [80000, 85000)
18 85000 [85000, 90000)
19 90000 [90000, 95000)
20 95000 [95000, 100000)
21 100000 {100000}
I am trying to make map using R ggplot2 dplyr & ggswissmap
My problem is the following, some points have the exact same coordinates ( long/V1;lat/V2 ) and despite my different tries I always get the same graphics with only one point instead of several.
Of course I tried the jitter function, any help would be appreciated or a link to a tutorial.
library(ggswissmaps)
library(dplyr)
dd<-shp_df[[5]]
df<-ggplot() +
geom_polygon(data=dd, aes( x = long, y = lat, group = group),color="white") +
theme_white_f()
df <- ggplot() +geom_polygon( data = df.merge ,
aes(x = long, y = lat, group = group),
color="white", fill="grey92" )
pp<-df+ geom_point(data=merge_data,aes(x=V1, y=V2,colour=FB), size=3, alpha=0.8) +
theme_white_f()
pp
pp+geom_jitter()
Points in center of each region
cnames <- aggregate(cbind(df.merge$long, df.merge$lat) ~ df.merge$V2, data=df.merge,
FUN=function(x)mean(range(x)))
cnames$Kanton<-cnames$`df.merge$V2`
merge_data<-right_join(cnames,tbl_canton_separate,by=c("Kanton"="Cantons"))
head(df.merge)
long lat order hole piece group id KTNR GRNR AREA_HA X_MIN X_MAX Y_MIN Y_MAX
1 692429 281173 1 FALSE 1 0.1 0 1 4 172895 669244 716900 223896 283343
2 692993 280860 2 FALSE 1 0.1 0 1 4 172895 669244 716900 223896 283343
3 693163 280421 3 FALSE 1 0.1 0 1 4 172895 669244 716900 223896 283343
4 693048 280201 4 FALSE 1 0.1 0 1 4 172895 669244 716900 223896 283343
5 693243 279410 5 FALSE 1 0.1 0 1 4 172895 669244 716900 223896 283343
6 693606 278826 6 FALSE 1 0.1 0 1 4 172895 669244 716900 223896 283343
X_CNTR Y_CNTR Z_MIN Z_MAX Z_AVG Z_MED V1 V2 V3
1 691800 252000 331 1290 533 504 1 ZH Zürich
2 691800 252000 331 1290 533 504 1 ZH Zürich
3 691800 252000 331 1290 533 504 1 ZH Zürich
4 691800 252000 331 1290 533 504 1 ZH Zürich
5 691800 252000 331 1290 533 504 1 ZH Zürich
6 691800 252000 331 1290 533 504 1 ZH Zürich
head(merge_data[,c(1:5,8)])
df.merge$V2 V1 V2 Kanton Numéro FB
1 AG 648725 247936 AG PGV01.002 II
2 AG 648725 247936 AG PGV01.005 I
3 AG 648725 247936 AG PGV01.044 II
4 AG 648725 247936 AG PGV01.047 II
5 AG 648725 247936 AG PGV01.071 IV
6 AG 648725 247936 AG PGV02.015 IV
Increase the spread of the points: Yes it works !
df + geom_jitter(data = merge_data, aes(x=V1, y=V2, colour = FB),
size=2, width = 10000, height = 10000, alpha = 0.5) + theme_white_f()
I want to categorize rivers dataset into “tiny” (<500), “short” (<1500), “medium” (<3000) and “long”
(>=3000). I want to plot a pie chart that visualizes frequency of these four categories.
I tried:
rivers[rivers >= 3000] = 'long'
rivers[rivers >= 1500 & rivers < 3000] = 'meidum'
rivers[rivers >= 500 & rivers < 1500]='short'
rivers[rivers < 500] = 'tiny'
It seems the third command has no effect on data and they are the same as before!
table(rivers)
rivers
500 505 524 525 529 538 540 545 560 570 600 605
2 1 1 2 1 1 1 1 1 1 3 1
610 618 620 625 630 652 671 680 696 710 720 730
1 1 1 1 1 1 1 1 1 1 2 1
735 760 780 800 840 850 870 890 900 906 981 long
2 1 1 1 1 1 1 1 2 1 1 1
meidum tiny
36 62
What is wrong with my commands, and is it the right way to draw a pie chart for them?
The cut function and easily perform this task:
#random data
rivers<-runif(20, 0, 5000)
#break into desired groups and label
answer<-cut(rivers, breaks=c(0, 500, 1500, 3000, Inf),
labels=c("tiny", "short", "medium", "long"), right=FALSE)
table(answer)
# tiny short medium long
# 1 10 7 2
You are running into this problem because you are trying to assign character values to an integer vector. If you work with a character vector instead, it should work:
> rivers_size <- as.character(rivers)
> rivers_size[rivers >= 3000] = 'long'
> rivers_size[rivers >= 1500 & rivers < 3000] = 'meidum'
> rivers_size[rivers >= 500 & rivers < 1500]='short'
> rivers_size[rivers < 500] = 'tiny'
> table(rivers_size)
rivers_size
long meidum short tiny
1 5 53 82
> pie(table(rivers_size))
Alternatively, the same thing can be accomplished using cut (as #Dave2e shows):
rivers <- cut(datasets::rivers,
breaks = c(0, 500, 1500, 3000, Inf),
labels = c("tiny", "short", "medium", "long"),
right = FALSE)
pie(table(rivers))
Here is another alternative using dplyr::case_when. It is more verbose than using cut but it is also easier generalize.
library("tidyverse")
set.seed(1234) # for reproducibility
# `case_when` vectorizes multiple `if-else` statements.
rivers <- sample.int(5000, size = 1000, replace = TRUE)
rivers <- case_when(
rivers >= 3000 ~ "long",
rivers >= 1500 ~ "medium",
rivers >= 500 ~ "short",
TRUE ~ "tiny"
)
table(rivers)
#> rivers
#> long medium short tiny
#> 406 303 199 92
Created on 2019-04-10 by the reprex package (v0.2.1)