Airflow dag tasks get stuck in running or no status - airflow

I created a dag that has inside few subdags which run simple bash commands. I can see that almost from the beginning many of the tasks get stuck in the running or no status modes and don't move on. After some time i can see that more and more dag instances gets stuck and I'm left with only one instance really running.
What can I do to make sure this doesn't happen?
Here is my dag:
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.subdag_operator import SubDagOperator
from datetime import datetime, timedelta
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2016, 1, 1),
'email': ['airflow#airflow.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
}
dag = DAG('performance', default_args=default_args,
schedule_interval='#hourly', max_active_runs=30)
adapterSubDag = DAG('performance.adapterSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-
core/presidio-workflows/tests/resources/jars/test.jar',
dag=adapterSubDag)
adapterSubdagOperator = SubDagOperator(
subdag=adapterSubDag,
task_id='adapterSubDagOperator',
dag=dag
)
presidioSubDag = DAG('performance.presidioSubDagOperator',
default_args=default_args)
presidioSubdagOperator = SubDagOperator(
subdag=presidioSubDag,
task_id='presidioSubDagOperator',
dag=dag
)
inputSubDag = DAG('performance.presidioSubDagOperator.inputSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-
core/presidio-workflows/tests/resources/jars/test.jar',
dag=inputSubDag)
inputSubdagOperator = SubDagOperator(
subdag=inputSubDag,
task_id='inputSubDagOperator',
dag=presidioSubDag
)
adeSubDag = DAG('performance.presidioSubDagOperator.adeSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-
core/presidio-workflows/tests/resources/jars/test.jar',
dag=adeSubDag)
adeSubdagOperator = SubDagOperator(
subdag=adeSubDag,
task_id='adeSubDagOperator',
dag=presidioSubDag
)
outputSubDag =
DAG('performance.presidioSubDagOperator.outputSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-core/presidio-workflows/tests/resources/jars/test.jar',
dag=outputSubDag)
outputSubdagOperator = SubDagOperator(
subdag=outputSubDag,
task_id='outputSubDagOperator',
dag=presidioSubDag
)
inputSubdagOperator >> adeSubdagOperator >> outputSubdagOperator
adapterSubdagOperator >> presidioSubdagOperator

Related

Pytorch geometric label prediction using GNN

I'm currently trying to find a way how to get a single label prediction from my GNN.I'd like to create a list of ground truths compared to how the model predicts the label.
My code is as following, ran in google collab:
Should I create a validation step or is there another way about this? I'm very new to this.
# Load the dataset
dataset = TUDataset(
root=".", name="Mutagenicity",
transform=AddSymbols(),
).shuffle()
from torch_geometric.data import DataLoader
loader = DataLoader(dataset, batch_size=32, shuffle=True)
data_batch = next(iter(loader))
print(data_batch)
print("Number of graphs in the batch:",data_batch.num_graphs)
import torch
from torch_scatter import scatter
from torch_geometric.utils import to_dense_adj
class GraphConvolution(torch.nn.Module):
def __init__(self, num_input_features, num_output_features):
super().__init__()
self.W1 = torch.nn.Parameter(torch.randn(num_input_features, num_output_features)*0.1)
self.W2 = torch.nn.Parameter(torch.randn(num_input_features, num_output_features)*0.1)
def forward(self, x, edge_index):
adj = to_dense_adj(edge_index)
neighbors_aggregation = adj # x
out = x # self.W1 + neighbors_aggregation # self.W2
return out
from torch.nn import Linear, LogSoftmax
import torch.nn.functional as F
from torch_geometric.nn import GraphConv, global_add_pool
class GNN(torch.nn.Module):
def __init__(
self,
num_classes,
hidden_dim,
node_features_dim,
edge_features_dim=None
):
super(GNN, self).__init__()
self.hidden_dim = hidden_dim
self.conv1 = GraphConv(node_features_dim, hidden_dim)
self.conv2 = GraphConv(hidden_dim, hidden_dim)
self.conv3 = GraphConv(hidden_dim, hidden_dim)
self.conv4 = GraphConv(hidden_dim, hidden_dim)
self.conv5 = GraphConv(hidden_dim, hidden_dim)
self.fc1 = Linear(hidden_dim, hidden_dim)
self.fc2 = Linear(hidden_dim, num_classes)
self.readout = LogSoftmax(dim=-1)
def forward(self, x, edge_index, batch):
x = F.relu(self.conv1(x, edge_index))
x = F.relu(self.conv2(x, edge_index))
x = F.relu(self.conv3(x, edge_index))
x = F.relu(self.conv4(x, edge_index))
x = F.relu(self.conv5(x, edge_index))
x = global_add_pool(x, batch)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=0.5, training=self.training)
x = self.fc2(x)
return self.readout(x)
from torch_geometric.loader import DataLoader
from tqdm.auto import tqdm
# If possible, we use a GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
idx_train_end = int(len(dataset) * .5)
idx_valid_end = int(len(dataset) * .7)
BATCH_SIZE = 128
BATCH_SIZE_TEST = len(dataset) - idx_valid_end
# In the test loader we set the natch size to be equal to the size of the whole test set
loader_train = DataLoader(dataset[:idx_train_end], batch_size=BATCH_SIZE, shuffle=True)
loader_valid = DataLoader(dataset[idx_train_end:idx_valid_end], batch_size=BATCH_SIZE, shuffle=True)
loader_test = DataLoader(dataset[idx_valid_end:], batch_size=BATCH_SIZE_TEST, shuffle=False)
from src import compute_accuracy
def train_model(loader_train, loader_valid, model, optimizer, loss_function, N_EPOCHS):
# Prepare empy lists for logging
train_losses = []
train_accs = []
val_accs = []
for epoch in tqdm(range(N_EPOCHS)):
epoch_loss = 0
for batch in tqdm(loader_train, leave=False):
batch.to(device)
out = model(batch.x, batch.edge_index, batch.batch)
loss = loss_function(out, batch.y.flatten())
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc_train = compute_accuracy(model, loader_train)
acc_valid = compute_accuracy(model, loader_valid)
with torch.no_grad():
train_accs.append(acc_train)
val_accs.append(acc_valid)
train_losses.append(loss)
print(f"Epoch: {epoch}, Loss: {loss}")

Airflow returns list index out of range

When I run a clear on all Downstream-Recursive-Failed-Future tasks in Airflow 1.10.10
it returns a list index out of range error.
Attached
DAG definition
default_args = {
'owner': 'I',
'depends_on_past': False,
'start_date': datetime(2020, 6, 1),
'email': ['dxxx#yy.com '],
'email_on_failure': False,
'email_on_retry': False,
'retries': 2,
'retry_delay': timedelta(minutes=5),
'on_failure_callback': slack_alert
}
with DAG(
dag_id='aaa',
default_args=default_args,
description='my_desc',
schedule_interval='0 * * * *'
# schedule_interval=None
) as dag:
I do not know if it's a normal issue in Airflow, but before upgrading to Airflow 2.0 I would like to know why this happens.

Plotly: How to add dropdown menu for every subplot?

I need to create two subplots with a dropdown menu and title for each graph. (side-by-side comparison). In addition, I 'd like to have a shared y-axis.
As for now, I have only one dropdown menu that change both graphs.
The code is following: (note that a df consists of 2 columns and datetimeindex).
import plotly.offline as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
from plotly import tools
labels = ["Vol", "R"]
fig = tools.make_subplots(rows=1, cols=2)
trace1 = go.Scatter(x=df.index,
y=df['Stock1'].rolling(window=12).std(),
mode='lines'
)
trace2 = go.Scatter(x=df.index,
y=df['Stock1'],
mode='lines'
)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 1)
trace1 = go.Scatter(x=df.index,
y=df['Stock2'].rolling(window=12).std(),
mode='lines'
)
trace2 = go.Scatter(x=df.index,
y=df['Stock2'],
mode='lines'
)
fig.append_trace(trace1, 1, 2)
fig.append_trace(trace2, 1, 2)
# Create buttons for drop down menu
buttons = []
for i, label in enumerate(labels):
visibility = [i==j for j in range(len(labels))]
button = dict(
label = label,
method = 'update',
args = [{'visible': visibility},
{'title': label}])
buttons.append(button)
updatemenus = list([
dict(active=-1,
x=-0.15,
buttons=buttons
)
])
fig['layout']['title'] = 'Title'
fig['layout']['showlegend'] = False
fig['layout']['updatemenus'] = updatemenus
iplot(fig, filename='dropdown')
According to empet's helpful answer in the Plotly forum, the important thing to know is that the visible key has length equal to the total number of traces in fig.data.
In your case, you have four traces that correspond to Stock 1 (Vol), Stock 1 (R), Stock 2 (Vol) and Stock 2 (R) which is the order in which you added these traces. So we can create 4 buttons to toggle the visibility feature of each trace and pass them as a list to the updatemenus dictionary.
import numpy as np
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
from plotly import tools
## recreate some random data
np.random.seed(42)
df = pd.DataFrame(data=np.random.randint(0,100,(365,2)), columns=['Stock1','Stock2'], index=pd.date_range(start='1/1/2019', end='12/31/2019'))
labels = ["Vol", "R"]
fig = tools.make_subplots(rows=1, cols=2)
trace1 = go.Scatter(x=df.index,
y=df['Stock1'].rolling(window=12).std(),
mode='lines'
)
trace2 = go.Scatter(x=df.index,
y=df['Stock1'],
mode='lines'
)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 1)
trace1 = go.Scatter(x=df.index,
y=df['Stock2'].rolling(window=12).std(),
mode='lines'
)
trace2 = go.Scatter(x=df.index,
y=df['Stock2'],
mode='lines'
)
fig.append_trace(trace1, 1, 2)
fig.append_trace(trace2, 1, 2)
## visible key for traces are in the order you append them
button1 = dict(method='update',
args=[{"visible": [True, False, False, False] }],
label="Stock 1, Vol" )
button2 = dict(method='update',
args=[{"visible": [False, True, False, False] }],
label="Stock 1, R" )
button3 = dict(method='update',
args=[{"visible": [False, False, True, False] }],
label="Stock 2, Vol" )
button4 = dict(method='update',
args=[{"visible": [False, False, False, True] }],
label="Stock 2, R" )
updatemenus = list([
dict(active=-1,
x=-0.15,
buttons=[button1, button2, button3, button4]
)
])
fig['layout']['title'] = 'Title'
fig['layout']['showlegend'] = False
fig['layout']['updatemenus'] = updatemenus
iplot(fig, filename='dropdown')

sp_execute_external_script R script 'unable to start png() device'

I am trying to create a plot in SQL Server R using the sp_execute_external_script command, but it fails to create the plot png image:
DECLARE #stateName nvarchar(50) = 'Michigan'
EXEC sp_execute_external_script
#language = N'R',
#script = N'
covidWeeklyDataSet <- InputDataSet
# set up report file for chart
reportfile <- "C:\\temp\\Covid19-Weekly.png"
png(file = reportfile)
plot(x = covidWeeklyDataSet[, 1], y = covidWeeklyDataSet[, 2],
main = paste(state_name, "Weekly Covid 19 Counts", sep = ""),
col = 3, ylab = "Cases", xlab = "Dates", ylim = c(0, 35000))
par(new = TRUE)
plot(x = covidWeeklyDataSet[, 1], y = covidWeeklyDataSet[, 3],
col = 2, ylab = "Cases", xlab = "Dates", ylim = c(0, 35000))
dev.off()
',
#input_data_1 = N'SELECT [date], cases, deaths FROM #weekly',
#params = N'#state_name nvarchar(20)',
#state_name = #stateName
The error message is as follows:
Msg 39004, Level 16, State 20, Line 13 A 'R' script error occurred
during execution of 'sp_execute_external_script' with HRESULT
0x80004004. Msg 39019, Level 16, State 2, Line 13 An external script
error occurred: Error in png(file = reportfile) : unable to start
png() device Calls: source -> withVisible -> eval -> eval -> png In
addition: Warning messages: 1: In png(file = reportfile) : unable to
open file 'C:\temp\Covid19-Weekly.png' for writing 2: In png(file =
reportfile) : opening device failed
Error in execution. Check the output for more information. Error in
eval(ei, envir) : Error in execution. Check the output for more
information. Calls: runScriptFile -> source -> withVisible -> eval ->
eval -> .Call Execution halted
It also fails as an administrator. Please help.
READ & WRITE permissions for c:\temp to "ALL APPLICATION PACKAGES".
EXEC sp_execute_external_script
#language = N'R',
#script = N'
#file.create("c:\\temp\\mytest.png")
png(filename = "c:\\temp\\mytest.png",
width = 500, height = 500, units = "px", pointsize = 12,
bg = "white", res = NA)
x <- sample(c("A","B","C","D"), 20, replace=TRUE)
plot(table(x))
dev.off()'

How to embed plot inside wx.SplitterWindow (right panel)?

I'm trying to embed plot inside right panel of Splitter window, how to add plot inside splitter window. please find here the link for the dataset.
https://www.dropbox.com/s/ncy6dlpm79p578s/Dataset.zip?dl=0.
The file contains rows and columns of wavelength and reflectance.
import wx
from pylab import *
import asciitable
import matplotlib.pyplot as plt
import os
from wxmplot import ImageMatrixFrame
class RandomObj(object):
def __init__(self, name):
self.name = name
class SLI(wx.Frame):
def __init__(self):
wx.Frame.__init__(self, None, -1, size=(820, 450))
splitter = wx.SplitterWindow(self, style = wx.SP_BORDER)
leftPanel = wx.Panel(splitter, size=(400,100))
rightPanel = wx.Panel(splitter, size=(400,100))
####Tree Widget#####
self.tree = wx.TreeCtrl(leftPanel)
leftSizer = wx.BoxSizer(wx.VERTICAL)
leftSizer.Add(self.tree, 1, wx.EXPAND | wx.ALIGN_CENTER)
leftPanel.SetSizer(leftSizer)
rightSizer = wx.BoxSizer(wx.VERTICAL)
self.display = wx.StaticText(rightPanel, -1, '', (10, 10),
style=wx.ALIGN_CENTRE)
rightSizer.Add(self.display, -1, wx.EXPAND)
rightPanel.SetSizer(rightSizer)
splitter.SplitVertically(leftPanel, rightPanel)
##### Splitter ends ####
root = self.tree.AddRoot('Database')
self.tree.AppendItem(root, 'USGS')
files = []
self.dname = []
self.test = []
for dirname, dirnames, filenames in os.walk('.\USGS'):
for filename in filenames:
files.append(os.path.join(dirname, filename))
self.test.append(filename)
self.tree.AppendItem(self.tree.GetLastChild(root), filename)
self.dname = files[:]
self.tree.AppendItem(root,'ASTER')
for dirname, dirnames, filenames in os.walk('.\ASTER'):
for filename in filenames:
files.append(os.path.join(dirname, filename))
self.test.append(filename)
self.tree.AppendItem(self.tree.GetLastChild(root), filename)
self.dname = files[:]
self.Bind(wx.EVT_TREE_ITEM_ACTIVATED, self.ASTER, self.tree)
def ASTER(self, event):
self.item = event.GetItem()
value1 = self.tree.GetItemText(self.item)
value2 = 0
value3 = 1
self.item=None
for k in self.test:
if value1 == k:
value2 +=1
break
else:
value2 +=1
for i in self.dname:
if value3 == value2:
array =[]
f=open(i, 'r')
for j in xrange(27):
f.next()
for line in f:
array.append(line)
data = asciitable.read(array)
plot(data.col1, data.col2)
title(value1)
show()
break
else:
value3 +=1
app = wx.App(None)
frame = ImageMatrixFrame()
SLI().Show()
app.MainLoop()
how to insert plot window inside right panel of splitter.
I am not 100% sure I understand your code - there are some formatting and indentation problems for sure. I also am not familiar with asciitable. But, that said, a wxmplot.PlotPanel or ImagePanel can be embedded in a wxPython Frame that uses a Splitter. An example might look like the code below. I tried to make it short, but also tried to make it complete and using plain wxPython. For a more complete application, you'd probably want to put the reading of the datafiles into a separate class, etc. Anyway, this uses your Dataset folder, and should mostly work to show the concepts:
#!/usr/bin/env python
import os
import wx
from wxmplot import PlotPanel
# see https://gist.github.com/newville/e805a6454c4e4c0e010bf0b3cc796d52
from asciifile import read_ascii
LEFTSTYLE = wx.ALIGN_LEFT|wx.GROW|wx.ALL
def pack(window, sizer, expand=1.1):
"simple wxPython pack function"
tsize = window.GetSize()
msize = window.GetMinSize()
window.SetSizer(sizer)
sizer.Fit(window)
nsize = (int(1.1*max(msize[0], tsize[0])),
int(1.1*max(msize[1], tsize[1])))
window.SetSize(nsize)
class SpectraPlotterFrame(wx.Frame):
def __init__(self, data_folder):
wx.Frame.__init__(self, None, size=(800, 450))
self.SetTitle("Data File Plotter: {:s}".format(data_folder))
self.data_folder = data_folder
self.current_filename = None
splitter = wx.SplitterWindow(self, style=wx.SP_LIVE_UPDATE)
splitter.SetMinimumPaneSize(200)
# left side: ListBox of File Names
l_panel = wx.Panel(splitter)
l_sizer = wx.BoxSizer(wx.VERTICAL)
self.filelist = wx.ListBox(l_panel)
self.filelist.Bind(wx.EVT_LISTBOX, self.onFileChoice)
l_sizer.Add(self.filelist, 1, LEFTSTYLE, 5)
pack(l_panel, l_sizer)
# right side: Panel to choose plot array labels, make plot
r_panel = wx.Panel(splitter)
r_sizer = wx.GridBagSizer(3, 3)
self.xarr = wx.Choice(r_panel, choices=[], size=(175, -1))
self.yarr = wx.Choice(r_panel, choices=[], size=(175, -1))
xlabel = wx.StaticText(r_panel, label='X:', style=LEFTSTYLE)
ylabel = wx.StaticText(r_panel, label='Y:', style=LEFTSTYLE)
plot_btn = wx.Button(r_panel, label='Show Plot', size=(125, -1))
plot_btn.Bind(wx.EVT_BUTTON, self.onPlot)
self.plotpanel = PlotPanel(r_panel, size=(650, 450))
r_sizer.Add(xlabel, (0, 0), (1, 1), LEFTSTYLE, 2)
r_sizer.Add(self.xarr, (0, 1), (1, 1), LEFTSTYLE, 2)
r_sizer.Add(ylabel, (0, 2), (1, 1), LEFTSTYLE, 2)
r_sizer.Add(self.yarr, (0, 3), (1, 1), LEFTSTYLE, 2)
r_sizer.Add(plot_btn, (0, 4), (1, 1), LEFTSTYLE, 2)
r_sizer.Add(self.plotpanel, (1, 0), (1, 6), LEFTSTYLE, 2)
pack(r_panel, r_sizer)
splitter.SplitVertically(l_panel, r_panel, 1)
sizer = wx.BoxSizer(wx.VERTICAL)
sizer.Add(splitter, 1, LEFTSTYLE, 5)
pack(self, sizer)
wx.CallAfter(self.read_datafiles)
self.Show()
self.Raise()
def read_datafiles(self):
self.datasets = {}
dfolder = os.path.abspath(self.data_folder)
for fname in sorted(os.listdir(self.data_folder)):
try:
self.datasets[fname] = read_ascii(os.path.join(dfolder, fname))
except:
print("Could not read file {:s}".format(fname))
self.filelist.Append(fname)
def onFileChoice(self, event=None):
self.current_filename = fname = event.GetString()
for choice, default in ((self.xarr, 0), (self.yarr, 1)):
choice.Clear()
choice.AppendItems(self.datasets[fname].array_labels)
choice.SetSelection(default)
def onPlot(self, event=None):
x = self.xarr.GetSelection()
y = self.yarr.GetSelection()
xlab = self.xarr.GetStringSelection()
ylab = self.yarr.GetStringSelection()
if self.current_filename is not None:
dset = self.datasets[self.current_filename]
self.plotpanel.plot(dset.data[x], dset.data[y], xlabel=xlab,
ylabel=ylab, label=self.current_filename,
show_legend=True)
class SpectraPlotterApp(wx.App):
def __init__(self, data_folder='.', **kws):
self.data_folder = data_folder
wx.App.__init__(self, **kws)
def createApp(self):
frame = SpectraPlotterFrame(data_folder=self.data_folder)
self.SetTopWindow(frame)
def OnInit(self):
self.createApp()
return True
if __name__ == '__main__':
SpectraPlotterApp(data_folder='Dataset').MainLoop()

Resources