# encoder <- 과거의 데이터를 바탕으로 미래를 예측 (t-1의 인과성 학습)
# decoder <- 미래의 데이터를 바탕으로 과거를 예측 (t+1의 인과성 학습)
class TimeAutoEncoder(nn.Module):
def __init__(self):
super(TimeAutoEncoder, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels = 48, out_channels = 1876, kernel_size = 3, stride = 1, padding = 0, dilation = 1),
nn.BatchNorm1d(1876),
nn.ReLU(),
)
self.conv2 = nn.Sequential(
nn.Conv1d(in_channels = 1876, out_channels = 938, kernel_size = 3, stride = 1, padding = 0, dilation = 2),
nn.BatchNorm1d(938),
nn.ReLU(),
)
self.conv3 = nn.Sequential(
nn.Conv1d(in_channels = 938, out_channels = 512, kernel_size = 3, stride = 1, padding = 0, dilation = 4),
nn.BatchNorm1d(512),
nn.ReLU(),
)
self.conv4 = nn.Sequential(
nn.Conv1d(in_channels = 512, out_channels = 256, kernel_size = 3, stride = 1, padding = 0, dilation = 8),
nn.BatchNorm1d(256),
nn.ReLU(),
)
self.conv5 = nn.Sequential(
nn.Conv1d(in_channels = 256, out_channels = 128, kernel_size = 3, stride = 1, padding = 0, dilation = 16),
nn.BatchNorm1d(128),
nn.ReLU(),
)
self.conv6 = nn.Sequential(
nn.Conv1d(in_channels = 128, out_channels = 64, kernel_size = 3, stride = 1, padding = 0, dilation = 32),
nn.BatchNorm1d(64),
nn.ReLU(),
)
self.conv7 = nn.Sequential(
nn.Conv1d(in_channels = 64, out_channels = 32, kernel_size = 3, stride = 1, padding = 0, dilation = 64),
nn.BatchNorm1d(32),
nn.ReLU(),
)
self.conv8 = nn.Sequential(
nn.Conv1d(in_channels = 32, out_channels = 16, kernel_size = 3, stride = 1, padding = 0, dilation = 128),
nn.BatchNorm1d(16),
nn.ReLU(),
)
self.conv9 = nn.Sequential(
nn.Conv1d(in_channels = 16, out_channels = 8, kernel_size = 3, stride = 1, padding = 0, dilation = 256),
nn.BatchNorm1d(8),
nn.ReLU(),
)
self.encoder_fc = nn.Sequential(
nn.Linear(8 * 1876, 128),
nn.BatchNorm1d(128),
nn.Tanh(),
)
self.decoder_fc = nn.Sequential(
nn.Linear(128, 8 * 1876),
nn.ReLU(),
)
self.t_conv1 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 8, out_channels = 16, kernel_size = 3, stride = 1, dilation=62),
nn.Conv1d(in_channels = 8, out_channels = 16, kernel_size = 3, stride = 1, padding = 0, dilation = 256),
nn.BatchNorm1d(16),
nn.ReLU(),
)
self.t_conv2 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1, dilation = 30),
nn.Conv1d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1, padding = 0, dilation = 128),
nn.BatchNorm1d(32),
nn.ReLU(),
)
self.t_conv3 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, dilation=14),
nn.Conv1d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, padding = 0, dilation = 64),
nn.BatchNorm1d(64),
nn.ReLU(),
)
self.t_conv4 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, dilation = 6),
nn.Conv1d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 0, dilation = 32),
nn.BatchNorm1d(128),
nn.ReLU(),
)
self.t_conv5 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, dilation=2),
nn.Conv1d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, padding = 0, dilation = 16),
nn.BatchNorm1d(256),
nn.ReLU(),
)
self.t_conv6 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, padding = 0, dilation = 8),
nn.BatchNorm1d(512),
nn.ReLU(),
)
self.t_conv7 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 512, out_channels = 938, kernel_size = 3, stride = 1, padding = 0, dilation = 4),
nn.BatchNorm1d(938),
nn.ReLU(),
)
self.t_conv8 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 938, out_channels = 1876, kernel_size = 3, stride = 1, padding = 0, dilation = 2),
nn.BatchNorm1d(1876),
nn.ReLU(),
)
self.t_conv9 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 512, out_channels = 48, kernel_size = 3, stride = 1, dilation= 1),
nn.Conv1d(in_channels = 1876, out_channels = 48, kernel_size = 3, stride = 1, padding = 0, dilation = 1)
)
def forward(self, mel_spec):
x = F.pad(mel_spec, pad = (2, 0, 0, 0))
x = self.conv1(x)
# print(x.shape)
x = F.pad(x, pad = (4, 0, 0, 0))
x = self.conv2(x)
# print(x.shape)
x = F.pad(x, pad = (8, 0, 0, 0))
x = self.conv3(x)
# print(x.shape)
x = F.pad(x, pad = (16, 0, 0, 0))
x = self.conv4(x)
# print(x.shape)
x = F.pad(x, pad = (32, 0, 0, 0))
x = self.conv5(x)
# print(x.shape)
x = F.pad(x, pad = (64, 0, 0, 0))
x = self.conv6(x)
# print(x.shape)
x = F.pad(x, pad = (128, 0, 0, 0))
x = self.conv7(x)
x = F.pad(x, pad = (256, 0, 0, 0))
x = self.conv8(x)
x = F.pad(x, pad = (512, 0, 0, 0))
x = self.conv9(x)
# print(x.shape)
encode = self.encoder_fc(x.view(-1, 8 * 1876))
# print('decode')
x = self.decoder_fc(encode)
x = x.view(-1, 8, 1876)
x = torch.swapaxes(torch.fliplr(torch.swapaxes(x, 1, 2)), 1, 2)
x = F.pad(x, pad = (512, 0, 0, 0))
x = self.t_conv1(x)
x = F.pad(x, pad = (256, 0, 0, 0))
x = self.t_conv2(x)
x = F.pad(x, pad = (128, 0, 0, 0))
x = self.t_conv3(x)
# print(x.shape)
x = F.pad(x, pad = (64, 0, 0, 0))
x = self.t_conv4(x)
# print(x.shape)
x = F.pad(x, pad = (32, 0, 0, 0))
x = self.t_conv5(x)
# print(x.shape)
x = F.pad(x, pad = (16, 0, 0, 0))
x = self.t_conv6(x)
# print(x.shape)
x = F.pad(x, pad = (8, 0, 0, 0))
x = self.t_conv7(x)
# print(x.shape)
x = F.pad(x, pad = (4, 0, 0, 0))
x = self.t_conv8(x)
# print(x.shape)
x = F.pad(x, pad = (2, 0, 0, 0))
x = self.t_conv9(x)
# print(x.shape)
x = torch.swapaxes(torch.fliplr(torch.swapaxes(x, 1, 2)), 1, 2)
return encode, x
import time
min_loss = 987654321
for epoch in range(1, epochs + 1):
start = time.time()
train_loss = train(model = model, train_loader = train_batch_li)
val_loss = val(model = model, train_loader = val_batch_li)
end = time.time()
print(f'EPOCH:{epoch}, Train Loss:{train_loss}, Val Loss:{val_loss}, 학습 시간: {end - start}')
if val_loss < min_loss:
min_loss = val_loss
torch.save(model.state_dict(), model_dir + f'TimeAutoEncoder_val.pt')
print('모델 저장')
After running the above code, the error is like the following
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/1986897196.py in <module>
5 for epoch in range(1, epochs + 1):
6 start = time.time()
----> 7 train_loss = train(model = model, train_loader = train_batch_li)
8 val_loss = val(model = model, train_loader = val_batch_li)
9 end = time.time()
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/1176484610.py in train(model, train_loader)
20 optimizer.zero_grad()
21
---> 22 encode, output = model(mel)
23
24 loss = criterion(output, mel)
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/2004772610.py in forward(self, mel_spec)
135 # print(x.shape)
136 x = F.pad(x, pad = (4, 0, 0, 0))
--> 137 x = self.conv2(x)
138 # print(x.shape)
139 x = F.pad(x, pad = (8, 0, 0, 0))
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/container.py in forward(self, input)
202 def forward(self, input):
203 for module in self:
--> 204 input = module(input)
205 return input
206
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py in forward(self, input)
169 used for normalization (i.e. in eval mode when buffers are not None).
170 """
--> 171 return F.batch_norm(
172 input,
173 # If buffers are not to be tracked, ensure that they won't be updated
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2448 _verify_batch_size(input.size())
2449
-> 2450 return torch.batch_norm(
2451 input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
2452 )
RuntimeError: running_mean should contain 1876 elements not 938
What would be the problem? I tried to make a change to the code related to the error message. But it doesn't work.
The error says that running_mean should contain 1876 elements not 938. As I searched on the other questions on stack overflow, I've found out that the I've to change number 1876 to 938. But I still don't know understand what this means.
Related
############
EDIT
############
I used this info_mat to compute evolution rates.
date1 <- rbind("February", "March", "April", "May", "June", "July", "August", "September", "October", "November")
sum1.visit_bush. <- rbind("0", "0" ,"1" , "-0.75" ,"2","0" ,"0.333333333333333" , "1.25" , "0", "-1")
sum1.counts_bush. <- rbind("0" ,"0.115290451813933", "-0.557273997206146", "0.146270002253775" , "0.100865119937082", "0.512412930880514", "0.435049598488427", "-0.0831961816984858", "0.824791311372408", "-0.156025577963601" )
sum1.hcounts_bush. <- rbind("0", "0.0387010676156584", "-0.625695931477516", "0.47254004576659", "-0.233100233100233", "0.99290780141844" , "-0.032536858159634" , "0.349973725696269" , "0.660957571039315", "-0.341223341926412")
evolution1 <- data.frame(date1, sum1.visit_bush., sum1.counts_bush., sum1.hcounts_bush.)
I then proceed as you suggested
df_month_cand <- evolution1 %>% select(c("date", paste0(c("sum.visit_", "sum.counts_", "sum.hcounts_"), "bush.")))
df_month_cand_plot <- melt(df_month_cand, id.vars = "date", variable.name = "Type", value.name = "y")
FunctionPlot <- function(cand, evolution) {
df_month_cand <- evolution %>% select(c("date1", paste0(c("sum1.visit_", "sum1.counts_", "sum1.hcounts_"), cand)))
df_month_cand_plot <- melt(df_month_cand, id.vars = "date1", variable.name = "Type", value.name = "y")
p <- ggplot(df_month_cand_plot, aes(x = date1, y = y, color = Type)) + geom_point() + geom_line(aes(group=Type)) +
labs(
title = paste0("Evolution of visits and coverage
per month for ", cand) ,
subtitle = "We read: from March to April, whereas the visits of -candidate- increased by -value*100 %-,
the coverage in newspapers decreased by -value*100 %-",
color="Type",
x="Months",
y="Percentage change over months") +
theme(
plot.title = element_text(size=15, face="bold", margin = margin(5, 0, 10, 10), vjust=2, hjust=0.5),
axis.text.x=element_text(angle=50, size=11.5, vjust=0.5),
axis.title.y = element_text(vjust=4),
plot.margin = unit(c(1, 0.3, 0.5, 0.6), "cm"),
legend.position = "bottom",
legend.box.background = element_rect(color="black", size=2),
legend.title = element_text(face = "bold", size=10),
legend.background = element_rect(fill="grey90",
size=0.5, linetype="solid",
colour ="black"),
panel.background = element_rect(fill = "gray90", colour = "gray70", size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'dashed', colour = "gray75")) +
scale_color_manual(labels = c("Visits", "Main text count", "Headline count"), values = c("tomato3", "deepskyblue3", "green2")) +
scale_x_discrete(limits = c("February", "March", "April", "May", "June", "July", "August", "September", "October", "November")) +
scale_y_discrete()
plot(p)
}
sapply("bush.", FunctionPlot, evolution1)
However, on the output the y axis is completely messed-up.
The values are not sorted from least to greatest.
Why? How to resolve this?
Lastly, to simplify the y axis I'd like to divide is from -1 to 2 with breaks of 0.25
I tried
scale_y_continuous(breaks=seq(-1, 2, 0.25))
But I have the following error code:
Error: Discrete value supplied to continuous scale
Thanks!!!!
You can convert you variable date from character to Date format:
date <- as.Date(date, format = "%Y-%d-%m")
ggplot can print dates at X axes. Now you don't need to create variable months by hand.
I think you should work with data.frame:
df_info <- data.frame(
date = date,
a1 = c(0, 0, 0, 0, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
b1 = c(1, 1, 1, 1, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
hb1 = c(2, 2, 2, 2, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
a2 = c(0, 0, 0, 0, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
b2 = c(1, 1, 1, 1, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
hb2 = c(2, 2, 2, 2, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
a3 = c(0, 0, 0, 0, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
b3 = c(1, 1, 1, 1, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
hb3 = c(2, 2, 2, 2, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
a4 = c(0, 0, 0, 0, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
b4 = c(1, 1, 1, 1, 6421, 41, 5667, 44, 1178, 0, 1070, 1),
hb4 = c(2, 2, 2, 2, 6421, 41, 5667, 44, 1178, 0, 1070, 1)
)
Or if you have big data you can convert matrix to data.frame with converting variables to numeric format (but without names).
df_info <- bind_cols(data.frame(date = date), info_mat[,-1] %>% as.data.frame() %>% lapply(as.numeric)) %>% as.data.frame()
Now we can select columns for first individual:
df <- df_info %>% select(c("date", paste0(c("a", "b", "hb"), 1)))
Next we will create data.frame for plot:
df_plot <- melt(df, id.vars = "date", variable.name = "Type", value.name = "y")
Your function for plot is good you can use it for df_plot. Now let's create function for plotting data for fixed number of individual:
f <- function(num, df_info) {
df <- df_info %>% select(c("date", paste0(c("a", "b", "hb"), num)))
df_plot <- melt(df, id.vars = "date", variable.name = "Type", value.name = "y")
p <- ggplot(df_plot, aes(x = date, y = y, color = `Type`)) + geom_point() +
geom_line() +
labs(
title = "Evolution of a and b and c per months",
subtitle = paste0("plot ", num),
color="Type",
x = "Months",
y = "over months"
)
plot(p)
}
Let's apply our function for each number of individual:
sapply(1:4, f, df_info)
Or
sapply(1:4, function(x) f(x, df_info))
But your data has bad scale. You cannot see the difference between 0 and 1 if you have 6421 on the same plot. but I don't know what you want to do with this data and plots.
I have the dataframe DATA1 as shown for a few rows:
structure(list(S = c(12, 12, 15, 15, 15, 9, 9), UG = c(84, 84,
84, 84, 84, 84, 84), CSi = c(0.487181441487271, 0.623551085193489,
0.505057492620447, 0.704318096382286, 0.575388552145397, 0.400731851672016,
0.490770631112789), N_l = c(1, 3, 1, 3, 5, 1, 3), N_b = c(5,
5, 5, 5, 5, 5, 5), m = c(1.2, 0.85, 1.2, 0.85, 0.65, 1.2, 0.85
), A = c(-12, -12, -15, -15, -15, -9, -9), x.sqr = c(1440, 1440,
2250, 2250, 2250, 810, 810), e_1 = c(21.8, 21.8, 29, 29, 29,
14.6, 14.6), e_2 = c(0, 9.8, 0, 17, 17, 0, 2.6), e_3 = c(0, -2.2,
0, 5, 5, 0, -9.4), e_4 = c(0, 0, 0, 0, -7, 0, 0), e_5 = c(0,
0, 0, 0, -19, 0, 0), K_g = c(6340598.65753794, 6340598.65753794,
6429472.98493414, 6429472.98493414, 6429472.98493414, 6296482.86883766,
6296482.86883766), stiff.girder = c(0.517988322166146, 0.517988322166146,
0.643978136780243, 0.643978136780243, 0.643978136780243, 0.416960174810184,
0.416960174810184), stiff.deck = c(276.422028597005, 276.422028597005,
147.89589537037, 147.89589537037, 147.89589537037, 642.725952664716,
642.725952664716)), row.names = c(10L, 30L, 50L, 70L, 90L, 110L,
130L), class = "data.frame")
I try to run the function proposed with nonlinear regression such as:
Proposed <- function(N_b,N_l,m,A,x.sqr,e_1,e_2,e_3,e_4,e_5,K_g,a,b,c,d) {
e <- data.frame(e_1,e_2,e_3,e_4,e_5,N_l)
CSi <- m * ((N_l/N_b) * ((a*K_g)^b) +
(max(A * apply(e,1,function(v) combn(v[1:5],v["N_l"],sum))) / x.sqr) * ((c*K_g)^d))
return(CSi)
}
library(minpack.lm)
G_1 <- nlsLM(CSi ~ Proposed(N_b,N_l,m,A,x.sqr,e_1,e_2,e_3,e_4,e_5,K_g,a,b,c,d),
data = DATA1,
start = c(a = 0.01, b = 0.01, c = 0.01, d = 0.01))
I get the error:
Error in A * apply(e, 1, function(v) combn(v[1:5], v["N_l"], sum)) :
non-numeric argument to binary operator
I am trying to extract the median values from the following data
df<-structure(list(n = 26L, time = c(64, 77, 142, 148, 167, 175,
181, 218, 286, 294, 323, 362, 375, 414, 427, 442, 455, 460, 505,
543, 544, 548, 598, 604, 771, 951), n.risk = c(26, 25, 24, 23,
22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7,
6, 5, 4, 3, 2, 1), n.event = c(1, 0, 1, 1, 0, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0), n.censor = c(0,
1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
0, 1, 0, 1), surv = c(0.961538461538462, 0.961538461538462, 0.921474358974359,
0.881410256410256, 0.881410256410256, 0.839438339438339, 0.839438339438339,
0.839438339438339, 0.839438339438339, 0.839438339438339, 0.786973443223443,
0.734508547008547, 0.682043650793651, 0.629578754578755, 0.577113858363858,
0.524648962148962, 0.524648962148962, 0.4663546330213, 0.408060303893637,
0.349765974765975, 0.349765974765975, 0.27981277981278, 0.209859584859585,
0.209859584859585, 0.104929792429792, 0.104929792429792), type = "right",
std.err = c(0.0392232270276368, 0.0392232270276368, 0.0578796660439579,
0.0729817807835649, 0.0729817807835649, 0.0877911880959172,
0.0877911880959172, 0.0877911880959172, 0.0877911880959172,
0.0877911880959172, 0.108967698764172, 0.128980092013706,
0.148762796526449, 0.168939711260041, 0.190043109889266,
0.212620066567793, 0.212620066567793, 0.24309706208875, 0.277404622263805,
0.317431643449181, 0.317431643449181, 0.388281918537096,
0.483834870173886, 0.483834870173886, 0.856794130229766,
0.856794130229766), upper = c(1, 1, 1, 1, 1, 0.997049673308717,
0.997049673308717, 0.997049673308717, 0.997049673308717,
0.997049673308717, 0.974346771572688, 0.945768634864856,
0.912933812389795, 0.876701615980298, 0.837580372384821,
0.795886882462859, 0.795886882462859, 0.751001648029994,
0.70283210436471, 0.651592180391947, 0.651592180391947, 0.598926755204663,
0.541713673163476, 0.541713673163476, 0.56260462703826, 0.56260462703826
), lower = c(0.890389006776242, 0.890389006776242, 0.822651689473135,
0.763934098528765, 0.763934098528765, 0.706741845048289,
0.706741845048289, 0.706741845048289, 0.706741845048289,
0.706741845048289, 0.635633245173389, 0.570438462156972,
0.509547937949868, 0.45211438075625, 0.397645905392106, 0.345848812876783,
0.345848812876783, 0.289595428067216, 0.236917480831754,
0.187749701094333, 0.187749701094333, 0.130725820922461,
0.0812994900059442, 0.0812994900059442, 0.019570157816371,
0.019570157816371), conf.type = "log", conf.int = 0.95, call = survfit(formula = Surv(as.numeric(as.character(all_clin$new_death))[ind_clin],
all_clin$death_event[ind_clin]) ~ event_rna[ind_gene,
ind_tum])), .Names = c("n", "time", "n.risk", "n.event",
"n.censor", "surv", "type", "std.err", "upper", "lower", "conf.type",
"conf.int", "call"), class = "survfit")
I try to get it like below
x1 <- ifelse (is.na(as.numeric(summary(s)$table[,'median'][1])),'NA',as.numeric(summary(s)$table[,'median'][1]))
x2 <- as.numeric(summary(s)$table[,'median'][2])
if(x1 != 'NA' & x2 != 'NA'){
lines(c(0,x1),c(0.5,0.5),col='blue')
lines(c(x1,x1),c(0,0.5),col='black')
lines(c(x2,x2),c(0,0.5),col='red')
}
I get the following error for both comments
Error in summary(s)$table[, "median"] : incorrect number of dimensions
Which function in R ggplot2 should I use to get this graph?
Here, the number "2" and "6" is about the number of cells that have different Division angle.
This might be a better answer. It's possible to do it using base graphics too but may need more work. Here's an example.
d <- structure(list(Angle = c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90,
100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220,
230, 240, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 250
), Frequency = c(0, 0, 0.001, 2, 4, 18.03, 11, 12, 5, 7, 10,
13, 2, 0.003, 0.01, 0, 1, 0.05, 2, 3, 3.7, 6, 0, 0, 0, 0, 0.2,
0.006, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Angle", "Frequency"
), row.names = c(NA, 36L), class = "data.frame")
d$radian = d$Angle*pi/180
d$x = d$Frequency*cos(d$radian)
d$y = d$Frequency*sin(d$radian)
m = max(d$x, d$y)
graphics.off()
windows(width = 6, height = 6)
plot(x = m,
y = m,
xlim = c(-m,m),
ylim = c(-m,m),
type = "n",
asp = 1,
axes = FALSE,
xlab = "",
ylab = "")
par(xpd = TRUE)
symbols (x =c(0,0,0,0,0), y = c(0,0,0,0,0), circles=c(3,6,9,12,15), fg = "grey",
add = TRUE, inches = FALSE, lty = 2)
lines(x = c(-15,15), y = c(0,0), lty = 2)
lines(x = c(0,0), y = c(-15,15), lty = 2)
text(x = c(0,0,-15,15), y = c(-15,15,0,0),
labels = c("180","0","270","90"), pos = c(1,3,2,4))
for (i in 1:nrow(d)){
lines(x = c(0,d$x[i]), y = c(0,d$y[i]), lwd = 3)
}
NOTE: I screwed up when labeling angles
I am a newbie in R. I am using autofitVariogram to daily rainfall data of 50 stations.The sample data is provided below.Some of stations have missing values represented by "NaN" values.
My question is regarding the variogramfit. The variogram covers only a distance of 60,000m. Why are the points in bins beyond 60Km not plotted. I had seen from spatial correlation plot maximum distance from lon-lat information is >200Km.
The summary of latitide and longitude information is provided below.
summary(lonlat)
lon lat
Min. :74.78 Min. :15.77
1st Qu.:75.14 1st Qu.:16.04
Median :75.56 Median :16.33
Mean :75.54 Mean :16.37
3rd Qu.:75.94 3rd Qu.:16.66
Max. :76.31 Max. :17.23
$ Sample data given below:
dput(rain[140:145,])
structure(list(Col0 = c(0, 0, 1, 9, 6.5, 0), Col1 = c(1.5, 36,
21, 44, 4, 0), Col2 = c(0, 0, 24.5, 21.5, 7.5, 1), Col3 = c(0,
1, 45, 3, 0, 0), Col4 = c(2, 0, 5, 54.5, 13.5, 0), Col5 = c(0.5,
2, 0, 3.5, 13.5, 0), Col6 = c(0.5, 0, 0, 59, 15.5, 0), Col7 = c(0,
0, 2.5, 1, 0, 0), Col8 = c(0, 6, 24, 2, 5.5, 0), Col9 = c(0,
3, 6, 1, 0, 7), Col10 = c(0.5, 1, 64, 20, 1, 0.5), Col11 = c(NaN,
NaN, NaN, NaN, NaN, NaN), Col12 = c(0, 11, 75, 19, 15.5, 0),
Col13 = c(0, 4, 57.5, 50.5, 8.5, 0), Col14 = c(1.5, 0.5,
127, 33.5, 34.5, 0), Col15 = c(0, 7, 0.5, 13, 1, 0), Col16 = c(0,
0.5, 81.5, 15, 49, 0), Col17 = c(0, 0, 4.5, 17, 5.5, 1),
Col18 = c(0, 3, 2.5, 0.5, 0, 0), Col19 = c(NaN, NaN, NaN,
NaN, NaN, NaN), Col20 = c(0, 0, 0, 0, 7, 0), Col21 = c(0,
1, 0, 5, 3.5, 0), Col22 = c(0, 0, 11.5, 28, 3.5, 0), Col23 = c(0,
0, 48.5, 0, 24.5, 0), Col24 = c(0, 0, 0, 10, 0.5, 14), Col25 = c(NaN,
NaN, NaN, NaN, NaN, NaN), Col26 = c(0, 7.5, 16, 28.5, 20.5,
0), Col27 = c(1.5, 0.5, 38, 28.5, 50, 0), Col28 = c(NaN,
NaN, NaN, NaN, NaN, NaN), Col29 = c(NaN, NaN, NaN, NaN, NaN,
NaN), Col30 = c(2.5, 0, 0, 80.5, 28, 13.5), Col31 = c(1,
0, 17, 85.5, 3.5, 0), Col32 = c(0, 0.5, 8, 101, 20, 4), Col33 = c(NaN,
NaN, NaN, NaN, NaN, NaN), Col34 = c(4, 3, 17, 122, 2, 2),
Col35 = c(0, 15.5, 14.5, 20, 3.5, 0), Col36 = c(0, 6.5, 8.5,
21, 7, 0), Col37 = c(0, 0, 1.5, 14.5, 0, 1.5), Col38 = c(0,
28, 30, 4, 0, 73), Col39 = c(28.5, 0, 4.5, 9.5, 1, 0), Col40 = c(1.5,
11.5, 32.5, 55, 0, 1), Col41 = c(0, 14.5, 0, 19, 12.5, 47.5
), Col42 = c(0, 28, 29, 17, 0.5, 20.5), Col43 = c(NaN, NaN,
NaN, NaN, NaN, NaN), Col44 = c(0, 19, 3.5, 42, 0, 0), Col45 = c(0,
0, 85, 15.5, 1, 0), Col46 = c(0, 0.5, 8, 24, 0.5, 0), Col47 = c(0,
1.5, 7, 12, 8.5, 0), Col48 = c(0, 0, 0, 43.5, 0, 1.5), Col49 = c(0,
13.5, 1, 16, 1, 1)), .Names = c("Col0", "Col1", "Col2", "Col3",
"Col4", "Col5", "Col6", "Col7", "Col8", "Col9", "Col10", "Col11",
"Col12", "Col13", "Col14", "Col15", "Col16", "Col17", "Col18",
"Col19", "Col20", "Col21", "Col22", "Col23", "Col24", "Col25",
"Col26", "Col27", "Col28", "Col29", "Col30", "Col31", "Col32",
"Col33", "Col34", "Col35", "Col36", "Col37", "Col38", "Col39",
"Col40", "Col41", "Col42", "Col43", "Col44", "Col45", "Col46",
"Col47", "Col48", "Col49"), row.names = 143:148, class = "data.frame")
# Import the required libraries
library(rgdal)
library(maptools)
library(gstat)
library(sp)
library(automap)
library(XLConnect)
# Read the station data from xls file
stnrain = readWorksheetFromFile(path_fileName,"Sheet1", region = "D1:BA187", header = FALSE)
N = nrow(stnrain)
rain = stnrain[4:N,]
lat = as.numeric(t(stnrain[2,]))
lon = as.numeric(t(stnrain[3,]))
lonlat = cbind(lon,lat)
#Transform from GCS to UTM protection
sp = SpatialPoints(lonlat,proj4string = CRS("+proj=longlat"))
sp_utm = spTransform(sp, CRS("+proj=utm +zone=43N +datum=WGS84"))
krige_value = list() #prepare a list for storing the autokrige output
krige_stderr = list()
nRows = nrow(rain)
for (i in 1:nRows)
{
irain = rain[i,]
miss_indx = (irain == "NaN")
irain = irain[!miss_indx]
irain = as.numeric(irain)
isallZeros = (max(irain) == 0) # To take care of the cases of dry day(irain =0)
irain = as.data.frame(irain)
M = nrow(irain)
if ((M > 5) & (!isallZeros)) # To avoid cases of NaN across many stations
{
print(i)
foo_utm = sp_utm[!indx]# Removing the locations with NaN values
data = data.frame(foo_utm,irain)
names(data) = c("Easting","Northing","rain")
coordinates(data) = c("Easting","Northing")
variogram = autofitVariogram(rain~1,data,model = "Sph",fix.values=c(0,NA,NA))
p = plot(variogram, main="Semi-variogram (Spherical Model)",xlab="Distance(m)",ylab="Semi-Variance(mm2)", sub=paste("Range: ",variogram$var_model$range[2], "Day",i))
print(p)
png(p)
dev.off()
}
else
{
krige_value[[i]] = list(rep(0, L))
krige_stderr[[i]] = list(rep(0, L))
}
}
}
Q2) How can i save the variogram fit png file in a loop. I understand that dev.off() should be used after each saving the figure, which i had done, but I am not able to save the the figure.
Any help would be appreciated.
Thanks,
Any suggestions would be appreciated?
In regard to your first question, the sample variogram is built using points up to a maximum distance of around 1/3 of the diagonal of the area of interest. The assumption here is that points farther away form that are not related, and because they are not in the sample variogram or variogram model they are plotted. This is just a choice, and might not be the correct choice, but when I wrote autofitVariogram it seemed to work well for my data. The variogram model you show confirms this, the range is smaller than 60 km.
For saving your png's I have two suggestions. First, call the plot command inside the png() dev.off pair, so not:
print(p)
png()
dev.off()
but:
png()
print(p)
dev.off()
In addition, I would create meaningful names for the png files.
To create sets of variogram plots, I would use ggplot2. This uses geom_line and facet_wrap. ggplot2 cannot deal directly with gstat/automap variogram models, luckily you can create distance semivariance data using the function variogramLine from gstat. See for example figure 3.1, and the plots in appendix A of this report I wrote. This answer I wrote earlier does also include an example of using ggplot2 for spatial data, this time to plot a grid map.