Related
# encoder <- 과거의 데이터를 바탕으로 미래를 예측 (t-1의 인과성 학습)
# decoder <- 미래의 데이터를 바탕으로 과거를 예측 (t+1의 인과성 학습)
class TimeAutoEncoder(nn.Module):
def __init__(self):
super(TimeAutoEncoder, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels = 48, out_channels = 1876, kernel_size = 3, stride = 1, padding = 0, dilation = 1),
nn.BatchNorm1d(1876),
nn.ReLU(),
)
self.conv2 = nn.Sequential(
nn.Conv1d(in_channels = 1876, out_channels = 938, kernel_size = 3, stride = 1, padding = 0, dilation = 2),
nn.BatchNorm1d(938),
nn.ReLU(),
)
self.conv3 = nn.Sequential(
nn.Conv1d(in_channels = 938, out_channels = 512, kernel_size = 3, stride = 1, padding = 0, dilation = 4),
nn.BatchNorm1d(512),
nn.ReLU(),
)
self.conv4 = nn.Sequential(
nn.Conv1d(in_channels = 512, out_channels = 256, kernel_size = 3, stride = 1, padding = 0, dilation = 8),
nn.BatchNorm1d(256),
nn.ReLU(),
)
self.conv5 = nn.Sequential(
nn.Conv1d(in_channels = 256, out_channels = 128, kernel_size = 3, stride = 1, padding = 0, dilation = 16),
nn.BatchNorm1d(128),
nn.ReLU(),
)
self.conv6 = nn.Sequential(
nn.Conv1d(in_channels = 128, out_channels = 64, kernel_size = 3, stride = 1, padding = 0, dilation = 32),
nn.BatchNorm1d(64),
nn.ReLU(),
)
self.conv7 = nn.Sequential(
nn.Conv1d(in_channels = 64, out_channels = 32, kernel_size = 3, stride = 1, padding = 0, dilation = 64),
nn.BatchNorm1d(32),
nn.ReLU(),
)
self.conv8 = nn.Sequential(
nn.Conv1d(in_channels = 32, out_channels = 16, kernel_size = 3, stride = 1, padding = 0, dilation = 128),
nn.BatchNorm1d(16),
nn.ReLU(),
)
self.conv9 = nn.Sequential(
nn.Conv1d(in_channels = 16, out_channels = 8, kernel_size = 3, stride = 1, padding = 0, dilation = 256),
nn.BatchNorm1d(8),
nn.ReLU(),
)
self.encoder_fc = nn.Sequential(
nn.Linear(8 * 1876, 128),
nn.BatchNorm1d(128),
nn.Tanh(),
)
self.decoder_fc = nn.Sequential(
nn.Linear(128, 8 * 1876),
nn.ReLU(),
)
self.t_conv1 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 8, out_channels = 16, kernel_size = 3, stride = 1, dilation=62),
nn.Conv1d(in_channels = 8, out_channels = 16, kernel_size = 3, stride = 1, padding = 0, dilation = 256),
nn.BatchNorm1d(16),
nn.ReLU(),
)
self.t_conv2 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1, dilation = 30),
nn.Conv1d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1, padding = 0, dilation = 128),
nn.BatchNorm1d(32),
nn.ReLU(),
)
self.t_conv3 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, dilation=14),
nn.Conv1d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, padding = 0, dilation = 64),
nn.BatchNorm1d(64),
nn.ReLU(),
)
self.t_conv4 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, dilation = 6),
nn.Conv1d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 0, dilation = 32),
nn.BatchNorm1d(128),
nn.ReLU(),
)
self.t_conv5 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, dilation=2),
nn.Conv1d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, padding = 0, dilation = 16),
nn.BatchNorm1d(256),
nn.ReLU(),
)
self.t_conv6 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, padding = 0, dilation = 8),
nn.BatchNorm1d(512),
nn.ReLU(),
)
self.t_conv7 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 512, out_channels = 938, kernel_size = 3, stride = 1, padding = 0, dilation = 4),
nn.BatchNorm1d(938),
nn.ReLU(),
)
self.t_conv8 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, dilation = 1),
nn.Conv1d(in_channels = 938, out_channels = 1876, kernel_size = 3, stride = 1, padding = 0, dilation = 2),
nn.BatchNorm1d(1876),
nn.ReLU(),
)
self.t_conv9 = nn.Sequential(
# nn.ConvTranspose1d(in_channels = 512, out_channels = 48, kernel_size = 3, stride = 1, dilation= 1),
nn.Conv1d(in_channels = 1876, out_channels = 48, kernel_size = 3, stride = 1, padding = 0, dilation = 1)
)
def forward(self, mel_spec):
x = F.pad(mel_spec, pad = (2, 0, 0, 0))
x = self.conv1(x)
# print(x.shape)
x = F.pad(x, pad = (4, 0, 0, 0))
x = self.conv2(x)
# print(x.shape)
x = F.pad(x, pad = (8, 0, 0, 0))
x = self.conv3(x)
# print(x.shape)
x = F.pad(x, pad = (16, 0, 0, 0))
x = self.conv4(x)
# print(x.shape)
x = F.pad(x, pad = (32, 0, 0, 0))
x = self.conv5(x)
# print(x.shape)
x = F.pad(x, pad = (64, 0, 0, 0))
x = self.conv6(x)
# print(x.shape)
x = F.pad(x, pad = (128, 0, 0, 0))
x = self.conv7(x)
x = F.pad(x, pad = (256, 0, 0, 0))
x = self.conv8(x)
x = F.pad(x, pad = (512, 0, 0, 0))
x = self.conv9(x)
# print(x.shape)
encode = self.encoder_fc(x.view(-1, 8 * 1876))
# print('decode')
x = self.decoder_fc(encode)
x = x.view(-1, 8, 1876)
x = torch.swapaxes(torch.fliplr(torch.swapaxes(x, 1, 2)), 1, 2)
x = F.pad(x, pad = (512, 0, 0, 0))
x = self.t_conv1(x)
x = F.pad(x, pad = (256, 0, 0, 0))
x = self.t_conv2(x)
x = F.pad(x, pad = (128, 0, 0, 0))
x = self.t_conv3(x)
# print(x.shape)
x = F.pad(x, pad = (64, 0, 0, 0))
x = self.t_conv4(x)
# print(x.shape)
x = F.pad(x, pad = (32, 0, 0, 0))
x = self.t_conv5(x)
# print(x.shape)
x = F.pad(x, pad = (16, 0, 0, 0))
x = self.t_conv6(x)
# print(x.shape)
x = F.pad(x, pad = (8, 0, 0, 0))
x = self.t_conv7(x)
# print(x.shape)
x = F.pad(x, pad = (4, 0, 0, 0))
x = self.t_conv8(x)
# print(x.shape)
x = F.pad(x, pad = (2, 0, 0, 0))
x = self.t_conv9(x)
# print(x.shape)
x = torch.swapaxes(torch.fliplr(torch.swapaxes(x, 1, 2)), 1, 2)
return encode, x
import time
min_loss = 987654321
for epoch in range(1, epochs + 1):
start = time.time()
train_loss = train(model = model, train_loader = train_batch_li)
val_loss = val(model = model, train_loader = val_batch_li)
end = time.time()
print(f'EPOCH:{epoch}, Train Loss:{train_loss}, Val Loss:{val_loss}, 학습 시간: {end - start}')
if val_loss < min_loss:
min_loss = val_loss
torch.save(model.state_dict(), model_dir + f'TimeAutoEncoder_val.pt')
print('모델 저장')
After running the above code, the error is like the following
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/1986897196.py in <module>
5 for epoch in range(1, epochs + 1):
6 start = time.time()
----> 7 train_loss = train(model = model, train_loader = train_batch_li)
8 val_loss = val(model = model, train_loader = val_batch_li)
9 end = time.time()
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/1176484610.py in train(model, train_loader)
20 optimizer.zero_grad()
21
---> 22 encode, output = model(mel)
23
24 loss = criterion(output, mel)
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/var/folders/br/5b5sgky977xdm4cgcc_mvds00000gn/T/ipykernel_19816/2004772610.py in forward(self, mel_spec)
135 # print(x.shape)
136 x = F.pad(x, pad = (4, 0, 0, 0))
--> 137 x = self.conv2(x)
138 # print(x.shape)
139 x = F.pad(x, pad = (8, 0, 0, 0))
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/container.py in forward(self, input)
202 def forward(self, input):
203 for module in self:
--> 204 input = module(input)
205 return input
206
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py in forward(self, input)
169 used for normalization (i.e. in eval mode when buffers are not None).
170 """
--> 171 return F.batch_norm(
172 input,
173 # If buffers are not to be tracked, ensure that they won't be updated
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2448 _verify_batch_size(input.size())
2449
-> 2450 return torch.batch_norm(
2451 input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
2452 )
RuntimeError: running_mean should contain 1876 elements not 938
What would be the problem? I tried to make a change to the code related to the error message. But it doesn't work.
The error says that running_mean should contain 1876 elements not 938. As I searched on the other questions on stack overflow, I've found out that the I've to change number 1876 to 938. But I still don't know understand what this means.
I am planning to do a supvervised machine learning project where I use data from a longitudinal study (panel study). The goal is to use the 2004 and 2009 predictors to predict the 2014 outcomes. I have now done a first data-preprocessing and the data frame looks like the following in a highly abbreviated form:
data_ml <- structure(
list(
ID = c(
201,
203,
602,
901,
1202,
1501,
1601,
1602,
1603,
201,
203,
602,
901,
1202,
1501,
1601,
1602,
1603,
201,
203,
602,
901,
1202,
1501,
1601,
1602,
1603
),
Studyyear = c(
2004,
2004,
2004,
2004,
2004,
2004,
2004,
2004,
2004,
2009,
2009,
2009,
2009,
2009,
2009,
2009,
2009,
2009,
2014,
2014,
2014,
2014,
2014,
2014,
2014,
2014,
2014
),
Gender = c(2, 1, 2, 2, 2, 1, 1, 2, 1,
2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1),
Predictor1 = c(6,
5, 4, 6, 4, 6, 4, 3, 3, 6, 5, 4, 6, 4, 6, 4, 3, 3, 6, 5, 4, 6,
4, 6, 4, 3, 3),
Predictor2 = c(2, 2, 1, 1, 2, 2, 1, 2, 2, 2,
2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2),
Predictor3 = c(0,
6, 1, 6, 0, 0, 4, 2, 3, 0, 6, 1, 6, 0, 0, 4, 1, 1, 1, 6, 1, 6,
0, 0, 4, 1, 1),
Outcome1 = c(0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1),
Outcome2 = c(0,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0,
1, 0, 1, 1, 0)
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA,-27L)
)
Until now, my prediction projects did not include the time dimension (see data_ml: "Studyyear"). So I could just create a task and then make the prediction with the "mlr" package as follows:
library(mlr)
task <- makeClassifTask(data = data_ml, target = 'Outcome1', positive = '1')
measures = list(acc, auc, tpr, tnr, f1)
resampling_MC <- makeResampleDesc(method = 'Subsample', iters = 500)
learner_logreg <- makeLearner('classif.logreg', predict.type = 'prob')
benchmark_MC <- benchmark(learners = learner_logreg, tasks = task, resamplings = resampling_MC, measures = measures)
Is it still possible to work with the "mlr" package with such a data frame as above and include the time dimension?
Yes, you can do this with the mlr3forecasting package, see the example here. The package isn't on CRAN yet though and still experimental, so you'll have to install it from Github (instructions on the package website) and things may not work as expected.
I know that with Flux.jl I can do julia> Flux.params(model) to get the parameters but the output does not tell me how many total parameters actually exist in the model itself. Is there a function to check this or a programatic way to calculate this?
As #mcabbott mentions in the comment, you can pass in the whole model to the params function to get the total count (sum(length, params(model))) or loop through each layer as follows:
julia> model = Chain(
resnet[1:end-2],
Dense(2048, 1000),
Dense(1000, 256),
Dense(256, 2), # we get 2048 features out, and we have 2 classes
)
Chain(Chain(Conv((7, 7), 3=>64), MaxPool((3, 3), pad=1, stride=2), Metalhead.ResidualBlock((Conv((1, 1), 64=>64), Conv((3, 3), 64=>64), Conv((1, 1), 64=>256)), (BatchNorm(64), BatchNorm(64), BatchNorm(256)), Chain(Conv((1, 1), 64=>256), BatchNorm(256))), Metalhead.ResidualBlock((Conv((1, 1), 256=>64), Conv((3, 3), 64=>64), Conv((1, 1), 64=>256)), (BatchNorm(64), BatchNorm(64), BatchNorm(256)), identity), Metalhead.ResidualBlock((Conv((1, 1), 256=>64), Conv((3, 3), 64=>64), Conv((1, 1), 64=>256)), (BatchNorm(64), BatchNorm(64), BatchNorm(256)), identity), Metalhead.ResidualBlock((Conv((1, 1), 256=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), Chain(Conv((1, 1), 256=>512), BatchNorm(512))), Metalhead.ResidualBlock((Conv((1, 1), 512=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), identity), Metalhead.ResidualBlock((Conv((1, 1), 512=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), identity), Metalhead.ResidualBlock((Conv((1, 1), 512=>128), Conv((3, 3), 128=>128), Conv((1, 1), 128=>512)), (BatchNorm(128), BatchNorm(128), BatchNorm(512)), identity), Metalhead.ResidualBlock((Conv((1, 1), 512=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), Chain(Conv((1, 1), 512=>1024), BatchNorm(1024))), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>256), Conv((3, 3), 256=>256), Conv((1, 1), 256=>1024)), (BatchNorm(256), BatchNorm(256), BatchNorm(1024)), identity), Metalhead.ResidualBlock((Conv((1, 1), 1024=>512), Conv((3, 3), 512=>512), Conv((1, 1), 512=>2048)), (BatchNorm(512), BatchNorm(512), BatchNorm(2048)), Chain(Conv((1, 1), 1024=>2048), BatchNorm(2048))), Metalhead.ResidualBlock((Conv((1, 1), 2048=>512), Conv((3, 3), 512=>512), Conv((1, 1), 512=>2048)), (BatchNorm(512), BatchNorm(512), BatchNorm(2048)), identity), Metalhead.ResidualBlock((Conv((1, 1), 2048=>512), Conv((3, 3), 512=>512), Conv((1, 1), 512=>2048)), (BatchNorm(512), BatchNorm(512), BatchNorm(2048)), identity), MeanPool((7, 7)), #103), Dense(2048, 1000), Dense(1000, 256), Dense(256, 2))
julia> paramCount = 0
0
julia> for layer in model
paramCount += sum(length, params(layer))
end
julia> paramCount
25840234
In this example, I am just incrementing the count but you could append the count from each layer into an array for example to keep track of each layer's count individually.
The below mentioned code is taken from model-zoo. I am trying to run the vgg19 tutorial in julia using flux library.
Code:
#model
using Flux
vgg19() = Chain(
Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
MaxPool((2,2)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
MaxPool((2,2)),
flatten,
Dense(512, 4096, relu),
Dropout(0.5),
Dense(4096, 4096, relu),
Dropout(0.5),
Dense(4096, 10),
softmax
)
#data
using MLDatasets: CIFAR10
using Flux: onehotbatch
# Data comes pre-normalized in Julia
trainX, trainY = CIFAR10.traindata(Float64)
testX, testY = CIFAR10.testdata(Float64)
# One hot encode labels
trainY = onehotbatch(trainY, 0:9)
testY = onehotbatch(testY, 0:9)
#training
using Flux: crossentropy, #epochs
using Flux.Data: DataLoader
model = vgg19()
opt = Momentum(.001, .9)
loss(x, y) = crossentropy(model(x), y)
data = DataLoader(trainX, trainY, batchsize=64)
#epochs 100 Flux.train!(loss, params(model), data, opt)
When I execute this file on IJulia, the following error is thrown:
MethodError: no method matching ∇maxpool(::Array{Float32,4}, ::Array{Float64,4}, ::Array{Float64,4}, ::PoolDims{2,(2, 2),(2, 2),(0, 0, 0, 0),(1, 1)})
Closest candidates are:
∇maxpool(::AbstractArray{T,N}, !Matched::AbstractArray{T,N}, !Matched::AbstractArray{T,N}, ::PoolDims; kwargs...) where {T, N}
Please suggest some solution for this error and if possible do provide a brief explanation or reference.
Thanks in advance!
As mentioned by #mcabbott, the issue was related to the input type of the data. This can be fixed by changing the type from Float64 to Float32 for below mentioned parameters under #data section.
trainX, trainY = CIFAR10.traindata(Float32)
testX, testY = CIFAR10.testdata(Float32)
I am trying to use networkD3::forceNetwork to create a chart of employers and colleges from which employers hire employees.
Right now, I have something like this:
forceNetwork(Links= Links, Nodes= netDf ,
Source = 'collegeName', Target = 'organizationName', Value='count',
NodeID = 'collegeName', Group = 'organizationName')
But the output doesn't look as expected. What I would like to have, is:
One bubble for each college
One bubble for each employer
Colleges connected to employer, with number of employers (count) mapped to the width of the connection lines.
Colleges are never connected to each other, and the same holds for employers.
This is the dataset I am using netDf:
structure(list(collegeName = c("college1", "college1", "college2",
"college3", "college3", "college3", "college4", "college5", "college5",
"college6", "college6", "college6", "college7", "college7", "college7",
"college8", "college9", "college10", "college10", "college11"
), organizationName = c("employer2", "employer3", "employer2",
"employer1", "employer2", "employer3", "employer2", "employer2",
"employer3", "employer1", "employer2", "employer3", "employer1",
"employer2", "employer3", "employer2", "employer2", "employer2",
"employer3", "employer2"), count = c(858, 176, 461, 201, 2266,
495, 430, 1992, 290, 127, 1754, 549, 136, 2839, 686, 638, 275,
1388, 387, 188), group = c(2, 3, 2, 1, 2, 3, 2, 2, 3, 1, 2, 3,
1, 2, 3, 2, 2, 2, 3, 2)), .Names = c("collegeName", "organizationName",
"count", "group"), row.names = c(NA, -20L), class = "data.frame")
And this is the Links dataset:
structure(list(collegeName = c(0, 0, 1, 2, 2, 2, 3, 4, 4, 5,
5, 5, 6, 6, 6, 7, 8, 9, 9, 10), organizationName = c(1, 2, 1,
0, 1, 2, 1, 1, 2, 0, 1, 2, 0, 1, 2, 1, 1, 1, 2, 1), count = c(858,
176, 461, 201, 2266, 495, 430, 1992, 290, 127, 1754, 549, 136,
2839, 686, 638, 275, 1388, 387, 188), group = c(2, 3, 2, 1, 2,
3, 2, 2, 3, 1, 2, 3, 1, 2, 3, 2, 2, 2, 3, 2)), .Names = c("collegeName",
"organizationName", "count", "group"), row.names = c(NA, -20L
), class = "data.frame")
Also, would it be possible to map a 4th variable to the bubble size? Say for instance that I want to map count to che size of the bubble pertaining the employees, how can I do that?
I think your Links and Nodes data frames do not meet the requirements as specified in ?forceNetwork. Instead, you could do:
library(networkD3)
set.seed(1)
nodes <- data.frame(Label = unique(c(netDf[,1], netDf[,2])))
nodes$Group <- as.factor(substr(nodes$Label, 1, 3))
nodes <- merge(
nodes,
aggregate(count~organizationName, netDf, sum),
by.x="Label", by.y="organizationName",
all.x=TRUE
)
nodes$count[is.na(nodes$count)] <- 1
links <- transform(netDf,
Source = match(netDf$collegeName, nodes$Label)-1,
Target = match(netDf$organizationName, nodes$Label)-1
)
forceNetwork(
Links = transform(links, count = count/min(count)),
Nodes = nodes,
Source = 'Source',
Target = 'Target',
Value='count',
NodeID = 'Label',
Group = "Group",
Nodesize = "count",
legend = TRUE,
opacity = 1,
radiusCalculation = JS("Math.log(d.nodesize)+6")
)