Related
# encoder <- 과거의 데이터를 바탕으로 미래를 예측 (t-1의 인과성 학습)
# decoder <- 미래의 데이터를 바탕으로 과거를 예측 (t+1의 인과성 학습)
class TimeAutoEncoder(nn.Module):
def __init__(self):
super(TimeAutoEncoder, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels = 48, out_channels = 1876, kernel_size = 3, stride = 1, padding = 0, dilation = 1),
nn.BatchNorm1d(1876),
nn.ReLU(),
)
self.conv2 = nn.Sequential(
nn.Conv1d(in_channels = 1876, out_channels = 938, kernel_size = 3, stride = 1, padding = 0, dilation = 2),
nn.BatchNorm1d(938),
nn.ReLU(),
)
self.conv3 = nn.Sequential(
nn.Conv1d(in_channels = 938, out_channels = 512, kernel_size = 3, stride = 1, padding = 0, dilation = 4),
nn.BatchNorm1d(512),
nn.ReLU(),
)
self.conv4 = nn.Sequential(
nn.Conv1d(in_channels = 512, out_channels = 256, kernel_size = 3, stride = 1, padding = 0, dilation = 8),
nn.BatchNorm1d(256),
nn.ReLU(),
)
self.conv5 = nn.Sequential(
nn.Conv1d(in_channels = 256, out_channels = 128, kernel_size = 3, stride = 1, padding = 0, dilation = 16),
nn.BatchNorm1d(128),
nn.ReLU(),
)
self.conv6 = nn.Sequential(
nn.Conv1d(in_channels = 128, out_channels = 64, kernel_size = 3, stride = 1, padding = 0, dilation = 32),
nn.BatchNorm1d(64),
nn.ReLU(),
)
self.conv7 = nn.Sequential(
nn.Conv1d(in_channels = 64, out_channels = 32, kernel_size = 3, stride = 1, padding = 0, dilation = 64),
nn.BatchNorm1d(32),
nn.ReLU(),
)
self.conv8 = nn.Sequential(
nn.Conv1d(in_channels = 32, out_channels = 16, kernel_size = 3, stride = 1, padding = 0, dilation = 128),
nn.BatchNorm1d(16),
nn.ReLU(),
)
self.conv9 = nn.Sequential(
nn.Conv1d(in_channels = 16, out_channels = 8, kernel_size = 3, stride = 1, padding = 0, dilation = 256),
nn.BatchNorm1d(8),
nn.ReLU(),
)
self.encoder_fc = nn.Sequential(
nn.Linear(8 * 1876, 128),
nn.BatchNorm1d(128),
nn.Tanh(),
)
self.decoder_fc = nn.Sequential(
nn.Linear(128, 8 * 1876),
nn.ReLU(),
)
self.t_conv1 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 8, out_channels = 16, kernel_size = 3, stride = 1, dilation=62),
nn.Conv1d(in_channels = 8, out_channels = 16, kernel_size = 3, stride = 1, padding = 0, dilation = 256),
nn.BatchNorm1d(16),
nn.ReLU(),
)
self.t_conv2 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1, dilation = 30),
nn.Conv1d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1, padding = 0, dilation = 128),
nn.BatchNorm1d(32),
nn.ReLU(),
)
self.t_conv3 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, dilation=14),
nn.Conv1d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, padding = 0, dilation = 64),
nn.BatchNorm1d(64),
nn.ReLU(),
)
self.t_conv4 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, dilation = 6),
nn.Conv1d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 0, dilation = 32),
nn.BatchNorm1d(128),
nn.ReLU(),
)
self.t_conv5 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, dilation=2),
nn.Conv1d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, padding = 0, dilation = 16),
nn.BatchNorm1d(256),
nn.ReLU(),
)
self.t_conv6 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, padding = 0, dilation = 8),
nn.BatchNorm1d(512),
nn.ReLU(),
)
self.t_conv7 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 512, out_channels = 938, kernel_size = 3, stride = 1, padding = 0, dilation = 4),
nn.BatchNorm1d(938),
nn.ReLU(),
)
self.t_conv8 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 938, out_channels = 1876, kernel_size = 3, stride = 1, padding = 0, dilation = 2),
nn.BatchNorm1d(1876),
nn.ReLU(),
)
self.t_conv9 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 512, out_channels = 48, kernel_size = 3, stride = 1, dilation= 1),
nn.Conv1d(in_channels = 1876, out_channels = 48, kernel_size = 3, stride = 1, padding = 0, dilation = 1)
)
def forward(self, mel_spec):
x = F.pad(mel_spec, pad = (2, 0, 0, 0))
x = self.conv1(x)
# print(x.shape)
x = F.pad(x, pad = (4, 0, 0, 0))
x = self.conv2(x)
# print(x.shape)
x = F.pad(x, pad = (8, 0, 0, 0))
x = self.conv3(x)
# print(x.shape)
x = F.pad(x, pad = (16, 0, 0, 0))
x = self.conv4(x)
# print(x.shape)
x = F.pad(x, pad = (32, 0, 0, 0))
x = self.conv5(x)
# print(x.shape)
x = F.pad(x, pad = (64, 0, 0, 0))
x = self.conv6(x)
# print(x.shape)
x = F.pad(x, pad = (128, 0, 0, 0))
x = self.conv7(x)
x = F.pad(x, pad = (256, 0, 0, 0))
x = self.conv8(x)
x = F.pad(x, pad = (512, 0, 0, 0))
x = self.conv9(x)
# print(x.shape)
encode = self.encoder_fc(x.view(-1, 8 * 1876))
# print('decode')
x = self.decoder_fc(encode)
x = x.view(-1, 8, 1876)
x = torch.swapaxes(torch.fliplr(torch.swapaxes(x, 1, 2)), 1, 2)
x = F.pad(x, pad = (512, 0, 0, 0))
x = self.t_conv1(x)
x = F.pad(x, pad = (256, 0, 0, 0))
x = self.t_conv2(x)
x = F.pad(x, pad = (128, 0, 0, 0))
x = self.t_conv3(x)
# print(x.shape)
x = F.pad(x, pad = (64, 0, 0, 0))
x = self.t_conv4(x)
# print(x.shape)
x = F.pad(x, pad = (32, 0, 0, 0))
x = self.t_conv5(x)
# print(x.shape)
x = F.pad(x, pad = (16, 0, 0, 0))
x = self.t_conv6(x)
# print(x.shape)
x = F.pad(x, pad = (8, 0, 0, 0))
x = self.t_conv7(x)
# print(x.shape)
x = F.pad(x, pad = (4, 0, 0, 0))
x = self.t_conv8(x)
# print(x.shape)
x = F.pad(x, pad = (2, 0, 0, 0))
x = self.t_conv9(x)
# print(x.shape)
x = torch.swapaxes(torch.fliplr(torch.swapaxes(x, 1, 2)), 1, 2)
return encode, x
import time
min_loss = 987654321
for epoch in range(1, epochs + 1):
start = time.time()
train_loss = train(model = model, train_loader = train_batch_li)
val_loss = val(model = model, train_loader = val_batch_li)
end = time.time()
print(f'EPOCH:{epoch}, Train Loss:{train_loss}, Val Loss:{val_loss}, 학습 시간: {end - start}')
if val_loss < min_loss:
min_loss = val_loss
torch.save(model.state_dict(), model_dir + f'TimeAutoEncoder_val.pt')
print('모델 저장')
After running the above code, the error is like the following
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/1986897196.py in <module>
5 for epoch in range(1, epochs + 1):
6 start = time.time()
----> 7 train_loss = train(model = model, train_loader = train_batch_li)
8 val_loss = val(model = model, train_loader = val_batch_li)
9 end = time.time()
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/1176484610.py in train(model, train_loader)
20 optimizer.zero_grad()
21
---> 22 encode, output = model(mel)
23
24 loss = criterion(output, mel)
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/2004772610.py in forward(self, mel_spec)
135 # print(x.shape)
136 x = F.pad(x, pad = (4, 0, 0, 0))
--> 137 x = self.conv2(x)
138 # print(x.shape)
139 x = F.pad(x, pad = (8, 0, 0, 0))
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/container.py in forward(self, input)
202 def forward(self, input):
203 for module in self:
--> 204 input = module(input)
205 return input
206
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py in forward(self, input)
169 used for normalization (i.e. in eval mode when buffers are not None).
170 """
--> 171 return F.batch_norm(
172 input,
173 # If buffers are not to be tracked, ensure that they won't be updated
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2448 _verify_batch_size(input.size())
2449
-> 2450 return torch.batch_norm(
2451 input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
2452 )
RuntimeError: running_mean should contain 1876 elements not 938
What would be the problem? I tried to make a change to the code related to the error message. But it doesn't work.
The error says that running_mean should contain 1876 elements not 938. As I searched on the other questions on stack overflow, I've found out that the I've to change number 1876 to 938. But I still don't know understand what this means.
I know that with Flux.jl I can do julia> Flux.params(model) to get the parameters but the output does not tell me how many total parameters actually exist in the model itself. Is there a function to check this or a programatic way to calculate this?
As #mcabbott mentions in the comment, you can pass in the whole model to the params function to get the total count (sum(length, params(model))) or loop through each layer as follows:
julia> model = Chain(
resnet[1:end-2],
Dense(2048, 1000),
Dense(1000, 256),
Dense(256, 2), # we get 2048 features out, and we have 2 classes
)
Chain(Chain(Conv((7, 7), 3=>64), MaxPool((3, 3), pad=1, stride=2), Metalhead.ResidualBlock((Conv((1, 1), 64=>64), Conv((3, 3), 64=>64), Conv((1, 1), 64=>256)), (BatchNorm(64), BatchNorm(64), BatchNorm(256)), Chain(Conv((1, 1), 64=>256), BatchNorm(256))), Metalhead.ResidualBlock((Conv((1, 1), 256=>64), Conv((3, 3), 64=>64), Conv((1, 1), 64=>256)), (BatchNorm(64), BatchNorm(64), BatchNorm(256)), identity), Metalhead.ResidualBlock((Conv((1, 1), 256=>64), Conv((3, 3), 64=>64), Conv((1, 1), 64=>256)), (BatchNorm(64), BatchNorm(64), BatchNorm(256)), identity), Metalhead.ResidualBlock((Conv((1, 1), 256=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), Chain(Conv((1, 1), 256=>512), BatchNorm(512))), Metalhead.ResidualBlock((Conv((1, 1), 512=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), identity), Metalhead.ResidualBlock((Conv((1, 1), 512=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), identity), Metalhead.ResidualBlock((Conv((1, 1), 512=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), identity), Metalhead.ResidualBlock((Conv((1, 1), 512=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), Chain(Conv((1, 1), 512=>1024), BatchNorm(1024))), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>512), Conv((3, 3), 512=>512), Conv((1, 1), 512=>2048)), (BatchNorm(512), BatchNorm(512), BatchNorm(2048)), Chain(Conv((1, 1), 1024=>2048), BatchNorm(2048))), Metalhead.ResidualBlock((Conv((1, 1), 2048=>512), Conv((3, 3), 512=>512), Conv((1, 1), 512=>2048)), (BatchNorm(512), BatchNorm(512), BatchNorm(2048)), identity), Metalhead.ResidualBlock((Conv((1, 1), 2048=>512), Conv((3, 3), 512=>512), Conv((1, 1), 512=>2048)), (BatchNorm(512), BatchNorm(512), BatchNorm(2048)), identity), MeanPool((7, 7)), #103), Dense(2048, 1000), Dense(1000, 256), Dense(256, 2))
julia> paramCount = 0
0
julia> for layer in model
paramCount += sum(length, params(layer))
end
julia> paramCount
25840234
In this example, I am just incrementing the count but you could append the count from each layer into an array for example to keep track of each layer's count individually.
I am trying to benchmark the performance of the Flux code mentioned below:
#model
using Flux
vgg19() = Chain(
Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
MaxPool((2,2)),
flatten,
Dense(512, 4096, relu),
Dropout(0.5),
Dense(4096, 4096, relu),
Dropout(0.5),
Dense(4096, 10),
softmax
)
#data
using MLDatasets: CIFAR10
using Flux: onehotbatch
# Data comes pre-normalized in Julia
trainX, trainY = CIFAR10.traindata(Float32)
testX, testY = CIFAR10.testdata(Float32)
# One hot encode labels
trainY = onehotbatch(trainY, 0:9)
testY = onehotbatch(testY, 0:9)
#training
using Flux: crossentropy, #epochs
using Flux.Data: DataLoader
model = vgg19()
opt = Momentum(.001, .9)
loss(x, y) = crossentropy(model(x), y)
data = DataLoader(trainX, trainY, batchsize=64)
#epochs 100 Flux.train!(loss, params(model), data, opt)
I have tried using the in-built tick() and tock() function to measure the time. But, this gives out a basic time and not efficient to perform the intensive comparison.
Numerous developers in the community have recommended using BenchmarkTools.jl package to benchmark the code. But when I try to benchmark the ScikitLearn Model in the REPL it produced a warning;
WARNING: redefinition of constant LogisticRegression. This may fail, cause incorrect answers, or produce other errors.
Similarly, I tried to benchmark the above-mentioned code in the REPL using #btime but it throws this error:
julia> using BenchmarkTools
julia> #btime include("C:/Users/user/code.jl")
[ Info: Epoch 1
WARNING: both Flux and BenchmarkTools export "params"; uses of it in module Main must be qualified
ERROR: LoadError: UndefVarError: params not defined
May I know what is the best way to perform a detailed benchmark of the code?
Thanks in advance.
I am trying to analyse a reoccurring event dataset and I am struggling to fit the model.
A subset of my data:
outdat <- structure(list(yr = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), jday = c(141, 151, 156, 157, 157, 158, 164, 168, 168, 168, 168, 168, 168, 170, 171, 171, 177, 177, 177, 178, 179, 181, 182, 182, 182, 182, 182, 184, 186, 188, 188, 188, 189, 191, 197, 197, 197, 198, 198, 199, 209, 211, 217, 223, 230, 161, 187, 196, 196, 196, 197, 197, 201, 204, 204, 204, 208, 209, 211, 212, 215, 215, 219, 221, 222, 225, 229, 229, 245, 252, 256, 159, 160, 166, 172, 174, 174, 178, 178, 178, 178, 178, 179, 182, 185, 185, 186, 186, 187, 187, 187, 187, 187, 187, 188, 188, 192, 195, 195, 195, 195, 195, 196, 196, 196, 200, 200, 200, 200, 202, 203, 204, 207, 207, 207, 207, 207, 207, 207, 208, 212, 212, 226), out = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, -123L), class = c("tbl_df", "tbl", "data.frame"))
The data is showing an occurrence of crop disease in a country and I am trying to first the Cox PH model. The survival package manual recommends defining the term cluster() in the formula.
cm <- coxph(Surv(time =jday, event = out ) ~ cluster(yr), data = outdat)
Error in reformulate(temp[1 - tcl]) :
'termlabels' must be a character vector of length at least one
If I do not define covariate as a grouping variable the model is fitted.
cm <- coxph(Surv(time =jday, event = out ) ~ yr, data = outdat)
Note that I have tried changing the class of outdat$yr to character/factor.
A cluster() term adjusts the standard errors of the hazards, but since it's the only term on the right hand side of the formula, you have no covariates to adjust. You need at least one covariate. And I wouldn't include the cluster term unless you had a specific reason to.
See here for more info
I am trying to use networkD3::forceNetwork to create a chart of employers and colleges from which employers hire employees.
Right now, I have something like this:
forceNetwork(Links= Links, Nodes= netDf ,
Source = 'collegeName', Target = 'organizationName', Value='count',
NodeID = 'collegeName', Group = 'organizationName')
But the output doesn't look as expected. What I would like to have, is:
One bubble for each college
One bubble for each employer
Colleges connected to employer, with number of employers (count) mapped to the width of the connection lines.
Colleges are never connected to each other, and the same holds for employers.
This is the dataset I am using netDf:
structure(list(collegeName = c("college1", "college1", "college2",
"college3", "college3", "college3", "college4", "college5", "college5",
"college6", "college6", "college6", "college7", "college7", "college7",
"college8", "college9", "college10", "college10", "college11"
), organizationName = c("employer2", "employer3", "employer2",
"employer1", "employer2", "employer3", "employer2", "employer2",
"employer3", "employer1", "employer2", "employer3", "employer1",
"employer2", "employer3", "employer2", "employer2", "employer2",
"employer3", "employer2"), count = c(858, 176, 461, 201, 2266,
495, 430, 1992, 290, 127, 1754, 549, 136, 2839, 686, 638, 275,
1388, 387, 188), group = c(2, 3, 2, 1, 2, 3, 2, 2, 3, 1, 2, 3,
1, 2, 3, 2, 2, 2, 3, 2)), .Names = c("collegeName", "organizationName",
"count", "group"), row.names = c(NA, -20L), class = "data.frame")
And this is the Links dataset:
structure(list(collegeName = c(0, 0, 1, 2, 2, 2, 3, 4, 4, 5,
5, 5, 6, 6, 6, 7, 8, 9, 9, 10), organizationName = c(1, 2, 1,
0, 1, 2, 1, 1, 2, 0, 1, 2, 0, 1, 2, 1, 1, 1, 2, 1), count = c(858,
176, 461, 201, 2266, 495, 430, 1992, 290, 127, 1754, 549, 136,
2839, 686, 638, 275, 1388, 387, 188), group = c(2, 3, 2, 1, 2,
3, 2, 2, 3, 1, 2, 3, 1, 2, 3, 2, 2, 2, 3, 2)), .Names = c("collegeName",
"organizationName", "count", "group"), row.names = c(NA, -20L
), class = "data.frame")
Also, would it be possible to map a 4th variable to the bubble size? Say for instance that I want to map count to che size of the bubble pertaining the employees, how can I do that?
I think your Links and Nodes data frames do not meet the requirements as specified in ?forceNetwork. Instead, you could do:
library(networkD3)
set.seed(1)
nodes <- data.frame(Label = unique(c(netDf[,1], netDf[,2])))
nodes$Group <- as.factor(substr(nodes$Label, 1, 3))
nodes <- merge(
nodes,
aggregate(count~organizationName, netDf, sum),
by.x="Label", by.y="organizationName",
all.x=TRUE
)
nodes$count[is.na(nodes$count)] <- 1
links <- transform(netDf,
Source = match(netDf$collegeName, nodes$Label)-1,
Target = match(netDf$organizationName, nodes$Label)-1
)
forceNetwork(
Links = transform(links, count = count/min(count)),
Nodes = nodes,
Source = 'Source',
Target = 'Target',
Value='count',
NodeID = 'Label',
Group = "Group",
Nodesize = "count",
legend = TRUE,
opacity = 1,
radiusCalculation = JS("Math.log(d.nodesize)+6")
)