How to fix verbose unittest output with subtests? - python-unittest

When using unittest with subtests and verbose=2, the summary of the failed tests at the top are missing 'FAIL\n'.
for example, this test (in Python 3.7):
import unittest
class TestThing(unittest.TestCase):
def test_thing1(self):
for i in range(10):
with self.subTest(i=i):
self.assertLess(i, 10)
def test_thing2(self):
for i in range(10):
with self.subTest(i=i):
self.assertLess(i, 9)
def test_thing3(self):
for i in range(10):
with self.subTest(i=i):
self.assertLess(i, 10)
if __name__ == '__main__':
unittest.main(verbosity=2)
Results in this output:
>python .\blah.py
test_thing1 (__main__.TestThing) ... ok
test_thing2 (__main__.TestThing) ... test_thing3 (__main__.TestThing) ... ok
======================================================================
FAIL: test_thing2 (__main__.TestThing) (i=9)
----------------------------------------------------------------------
Traceback (most recent call last):
File ".\blah.py", line 16, in test_thing2
self.assertLess(i, 9)
AssertionError: 9 not less than 9
----------------------------------------------------------------------
Ran 3 tests in 0.003s
FAILED (failures=1)
If I remove the subtest, then the output becomes:
test_thing1 (__main__.TestThing) ... ok
test_thing2 (__main__.TestThing) ... FAIL
test_thing3 (__main__.TestThing) ... ok
======================================================================
FAIL: test_thing2 (__main__.TestThing)
----------------------------------------------------------------------
Traceback (most recent call last):
File ".\blah.py", line 16, in test_thing2
self.assertLess(i, 9)
AssertionError: 9 not less than 9
----------------------------------------------------------------------
Ran 3 tests in 0.002s
FAILED (failures=1)
I've tried using my own resultclass but it appears that the addFailure doesn't get called for subtests (like it does for regular tests).
class MyTextTestResult(unittest.TextTestResult):
def addFailure(self, test, err):
print('adding a failure')
super().addFailure(test, err)
if self.showAll:
self.stream.writeln("FAIL")
elif self.dots:
self.stream.write('F')
self.stream.flush()
def addSuccess(self, test):
print('adding a success')
super().addSuccess(test)
if self.showAll:
self.stream.writeln("ok")
elif self.dots:
self.stream.write('.')
self.stream.flush()
Does anyone know how to fix the output for subtest results (using verbose=2)?

Looks like if I change the addSubTest in TextTestResult, that does it.
class MyTextTestResult(unittest.TextTestResult):
def addSubTest(self, test, subtest, err):
super().addSubTest(test, subtest, err)
if err is not None:
if not self.stream.getvalue().strip().endswith('FAIL'):
self.stream.writeln("FAIL")
if __name__ == '__main__':
# unittest.main(verbosity=2)
runner = unittest.TextTestRunner(None, resultclass=MyTextTestResult, verbosity=2)
suite = unittest.TestSuite()
suite.addTest(TestThing('test_thing1'))
suite.addTest(TestThing('test_thing2'))
suite.addTest(TestThing('test_thing3'))
runner.run(suite)
test_thing1 (__main__.TestThing) ... ok
test_thing2 (__main__.TestThing) ... FAIL
test_thing3 (__main__.TestThing) ... ok
======================================================================
FAIL: test_thing2 (__main__.TestThing) (i=9)
----------------------------------------------------------------------
Traceback (most recent call last):
File ".\blah.py", line 22, in test_thing2
self.assertLess(i, 9)
AssertionError: 9 not less than 9
----------------------------------------------------------------------
Ran 3 tests in 0.004s
FAILED (failures=2)

It seems that the TextTestRunner doesn't print anything (including the newline) for tests unless all subtests succeed.
Instead of not printing anything, the following TestResult will cause the TextTestRunner to print "some subtests weren't successful" (including a newline):
class MyTestResult(unittest.TextTestResult):
def startTest(self, *args):
self.current_test_has_subtests = False
super().startTest(*args)
def addSubTest(self, *args):
super().addSubTest(*args)
self.current_test_has_subtests = True
def stopTest(self, test):
super().stopTest(test)
if self.current_test_has_subtests and not self.wasSuccessful():
print("some subtests weren't successful")
To use this, change the testRunner in the unittest.main call. For example:
unittest.main(
verbosity=2,
testRunner=unittest.TextTestRunner(verbosity=2, resultclass=MyTestRunner)
)

Related

Geting error Caused by: com.databricks.NotebookExecutionException: FAILED

I am trying to run the below notebook through databricks but getting the below error. I have tried to update the notebook timeout and the retry mechanism but still no luck yet.
NotebookData("/Users/mynotebook",9900, retry=3)
]
res = parallelNotebooks(notebooks, 2)
result = [f.result(timeout=9900) for f in res] # This is a blocking call.
print(result)
Can someone please help me to sort out this issue? Thanks
%python
from concurrent.futures import ThreadPoolExecutor
class NotebookData:
def __init__(self, path, timeout, parameters=None, retry=0):
self.path = path
self.timeout = timeout
self.parameters = parameters
self.retry = retry
def submitNotebook(notebook):
print("Running notebook %s" % notebook.path)
try:
if (notebook.parameters):
return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
else:
return dbutils.notebook.run(notebook.path, notebook.timeout)
except Exception:
if notebook.retry < 1:
raise
print("Retrying notebook %s" % notebook.path)
notebook.retry = notebook.retry - 1
submitNotebook(notebook)
def parallelNotebooks(notebooks, numInParallel):
# This code limits the number of parallel notebooks.
with ThreadPoolExecutor(max_workers=numInParallel) as ec:
return [ec.submit(submitNotebook, notebook) for notebook in notebooks]
notebooks = [
NotebookData("/Users/mynotebook",1200000, retry=0)
]
res = parallelNotebooks(notebooks, 2)
result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
print(result)
Error:
Py4JJavaError Traceback (most recent call last)
<command-1143841910698378> in <module>
32 ]
33 res = parallelNotebooks(notebooks, 2)
---> 34 result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
35 print(result)
<command-1143841910698378> in <listcomp>(.0)
32 ]
33 res = parallelNotebooks(notebooks, 2)
---> 34 result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
35 print(result)
/usr/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
426 raise CancelledError()
427 elif self._state == FINISHED:
--> 428 return self.__get_result()
429
430 self._condition.wait(timeout)
/usr/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
/usr/lib/python3.7/concurrent/futures/thread.py in run(self)
55
56 try:
---> 57 result = self.fn(*self.args, **self.kwargs)
58 except BaseException as exc:
59 self.future.set_exception(exc)
<command-1143841910698378> in submitNotebook(notebook)
12 return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
13 else:
---> 14 return dbutils.notebook.run(notebook.path, notebook.timeout)
15 except Exception:
16 if notebook.retry < 1:
/local_disk0/tmp/1664351986642-0/dbutils.py in run(self, path, timeout_seconds, arguments, _NotebookHandler__databricks_internal_cluster_spec)
136 arguments,
137 __databricks_internal_cluster_spec,
--> 138 self.shell.currentJobGroup)
139
140 def __repr__(self):
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
125 def deco(*a, **kw):
126 try:
--> 127 return f(*a, **kw)
128 except py4j.protocol.Py4JJavaError as e:
129 converted = convert_exception(e.java_exception)
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling o1741._run.
: com.databricks.WorkflowException: com.databricks.NotebookExecutionException: FAILED
at com.databricks.workflow.WorkflowDriver.run(WorkflowDriver.scala:95)
at com.databricks.dbutils_v1.impl.NotebookUtilsImpl.run(NotebookUtilsImpl.scala:122)
at com.databricks.dbutils_v1.impl.NotebookUtilsImpl._run(NotebookUtilsImpl.scala:89)
at sun.reflect.GeneratedMethodAccessor820.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.NotebookExecutionException: FAILED
at com.databricks.workflow.WorkflowDriver.run0(WorkflowDriver.scala:141)
at com.databricks.workflow.WorkflowDriver.run(WorkflowDriver.scala:90)
... 12 more

Unable to debug the syntax error and need some asisstance

I am learing Python3.6 and spent a long time looking at this code to debug a syntax error occurring in the allcoate_seat function, position "if row not in rows:" and wanted to share this and find out the reason. I am still building the code and stuck at this position.
Code snippet:
class Flight:
def __init__(self, fnumber, aircraft):
if not fnumber[:2].isalpha():
raise ValueError ("No airline code in {}".format(fnumber))
if not fnumber[:2].isupper():
raise ValueError ("Invalid airline code in {}".format(fnumber))
if not (fnumber[2:].isdigit() and int(fnumber[2:]))<9999:
raise ValueError ("Invalid route in {}".format(fnumber))
self.fnumber=fnumber
self.aircraft=aircraft
rows, seats = aircraft.seating_plan()
self.seating=[None] + [{letter:None for letter in seats} for _ in rows]
def fnumber (self):
return self.fnumber
def airline(self):
return self.fnumber[:2]
def aircraft_model(self):
return self.aircraft.model()
def allcoate_seat(self, seat, passenger):
rows, seat_letters=self._aircraft.seating_plan()
letter=seat[-1]
if letter not in seat_letters:
raise ValueError ('Invalid seat letters {}'.format(letter))
row_text=seat[:-1]
try:
row=int(row_text)
except ValueError:
raise ValueError('Invalid seat row {}'.format(row_text)
if row not in rows:
raise ValueError ('Invalid row number {}'.format(row))
if self._seating[row][letter] is not None:
raise ValueError ('Seat already allocated {}'.format(seat))
self._seating[row][letter]=passenger
class Aircraft:
def __init__(self, registration, model, num_rows, num_seats_per_row):
self._registration=registration
self._model=model
self._num_rows=num_rows
self._num_seats_per_row=num_seats_per_row
def registration(self):
return self._registration
def model(self):
return self._model
def seating_plan(self):
return(range(1,self._num_rows+1),'ABCDEFGHJ'[:self._num_seats_per_row])
f1= Flight('TA098', Aircraft('TA008','AirbusA380',10,5))
print(f1.fnumber, f1.aircraft_model(), f1.airline(), pp(f1.seating))

"Failed to import pydot" throws in kerasR

I use the package keras under R and I would like to know if there was a command like python with plot_model () which allows to display its neuron network
library(keras)
for example I would like to display this neural network under R
model <- keras_model_sequential()
model %>%
layer_dense(units = 5, input_shape = 2) %>%
layer_activation("relu") %>%
layer_dense(units = 1)
I install package kerasR for use the function plot_model(), but i have this error.
> library(kerasR)
> plot_model(model)
Error in py_call_impl(callable, dots$args, dots$keywords) :
ImportError: Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.
Detailed traceback:
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 131, in plot_model
dot = model_to_dot(model, show_shapes, show_layer_names, rankdir)
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 52, in model_to_dot
_check_pydot()
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 27, in _check_pydot
raise ImportError('Failed to import pydot. You must install pydot'
I'm use windows 10 64 bits, i use RStudio with Anaconda
In [4] pydot.Dot.create(pydot.Dot())
Out[4]: b"%!PS-Adobe-3.0\r\n%%Creator: graphviz version 2.38.0 (20140413.2041)\r\n%%Title: G\r\n%%Pages: (atend)\r\n%%BoundingBox: (atend)\r\n%%EndComments\r\nsave\r\n%%BeginProlog\r\n/DotDict 200 dict def\r\nDotDict begin\r\n\r\n/setupLatin1 {\r\nmark\r\n/EncodingVector 256 array def\r\n EncodingVector 0\r\n\r\nISOLatin1Encoding 0 255 getinterval putinterval\r\nEncodingVector 45 /hyphen put\r\n\r\n% Set up ISO Latin 1 character encoding\r\n/starnetISO {\r\n dup dup findfont dup length dict begin\r\n { 1 index /FID ne { def }{ pop pop } ifelse\r\n } forall\r\n /Encoding EncodingVector def\r\n currentdict end definefont\r\n} def\r\n/Times-Roman starnetISO def\r\n/Times-Italic starnetISO def\r\n/Times-Bold starnetISO def\r\n/Times-BoldItalic starnetISO def\r\n/Helvetica starnetISO def\r\n/Helvetica-Oblique starnetISO def\r\n/Helvetica-Bold starnetISO def\r\n/Helvetica-BoldOblique starnetISO def\r\n/Courier starnetISO def\r\n/Courier-Oblique starnetISO def\r\n/Courier-Bold starnetISO def\r\n/Courier-BoldOblique starnetISO def\r\ncleartomark\r\n} bind def\r\n\r\n%%BeginResource: procset graphviz 0 0\r\n/coord-font-family /Times-Roman def\r\n/default-font-family /Times-Roman def\r\n/coordfont coord-font-family findfont 8 scalefont def\r\n\r\n/InvScaleFactor 1.0 def\r\n/set_scale {\r\n dup 1 exch div /InvScaleFactor exch def\r\n scale\r\n} bind def\r\n\r\n% styles\r\n/solid { [] 0 setdash } bind def\r\n/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def\r\n/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def\r\n/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def\r\n/bold { 2 setlinewidth } bind def\r\n/filled { } bind def\r\n/unfilled { } bind def\r\n/rounded { } bind def\r\n/diagonals { } bind def\r\n/tapered { } bind def\r\n\r\n% hooks for setting color \r\n/nodecolor { sethsbcolor } bind def\r\n/edgecolor { sethsbcolor } bind def\r\n/graphcolor { sethsbcolor } bind def\r\n/nopcolor {pop pop pop} bind def\r\n\r\n/beginpage {\t% i j npages\r\n\t/npages exch def\r\n\t/j exch def\r\n\t/i exch def\r\n\t/str 10 string def\r\n\tnpages 1 gt {\r\n\t\tgsave\r\n\t\t\tcoordfont setfont\r\n\t\t\t0 0 moveto\r\n\t\t\t(\\() show i str cvs show (,) show j str cvs show (\\)) show\r\n\t\tgrestore\r\n\t} if\r\n} bind def\r\n\r\n/set_font {\r\n\tfindfont exch\r\n\tscalefont setfont\r\n} def\r\n\r\n% draw text fitted to its expected width\r\n/alignedtext {\t\t\t% width text\r\n\t/text exch def\r\n\t/width exch def\r\n\tgsave\r\n\t\twidth 0 gt {\r\n\t\t\t[] 0 setdash\r\n\t\t\ttext stringwidth pop width exch sub text length div 0 text ashow\r\n\t\t} if\r\n\tgrestore\r\n} def\r\n\r\n/boxprim {\t\t\t\t% xcorner ycorner xsize ysize\r\n\t\t4 2 roll\r\n\t\tmoveto\r\n\t\t2 copy\r\n\t\texch 0 rlineto\r\n\t\t0 exch rlineto\r\n\t\tpop neg 0 rlineto\r\n\t\tclosepath\r\n} bind def\r\n\r\n/ellipse_path {\r\n\t/ry exch def\r\n\t/rx exch def\r\n\t/y exch def\r\n\t/x exch def\r\n\tmatrix currentmatrix\r\n\tnewpath\r\n\tx y translate\r\n\trx ry scale\r\n\t0 0 1 0 360 arc\r\n\tsetmatrix\r\n} bind def\r\n\r\n/endpage { showpage } bind def\r\n/showpage { } def\r\n\r\n/layercolorseq\r\n\t[\t% layer color sequence - darkest to lightest\r\n\t\t[0 0 0]\r\n\t\t[.2 .8 .8]\r\n\t\t[.4 .8 .8]\r\n\t\t[.6 .8 .8]\r\n\t\t[.8 .8 .8]\r\n\t]\r\ndef\r\n\r\n/layerlen layercolorseq length def\r\n\r\n/setlayer {/maxlayer exch def /curlayer exch def\r\n\tlayercolorseq curlayer 1 sub layerlen mod get\r\n\taload pop sethsbcolor\r\n\t/nodecolor {nopcolor} def\r\n\t/edgecolor {nopcolor} def\r\n\t/graphcolor {nopcolor} def\r\n} bind def\r\n\r\n/onlayer { curlayer ne {invis} if } def\r\n\r\n/onlayers {\r\n\t/myupper exch def\r\n\t/mylower exch def\r\n\tcurlayer mylower lt\r\n\tcurlayer myupper gt\r\n\tor\r\n\t{invis} if\r\n} def\r\n\r\n/curlayer 0 def\r\n\r\n%%EndResource\r\n%%EndProlog\r\n%%BeginSetup\r\n14 default-font-family set_font\r\n1 setmiterlimit\r\n% /arrowlength 10 def\r\n% /arrowwidth 5 def\r\n\r\n% make sure pdfmark is harmless for PS-interpreters other than Distiller\r\n/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse\r\n% make '<<' and '>>' safe on PS Level 1 devices\r\n/languagelevel where {pop languagelevel}{1} ifelse\r\n2 lt {\r\n userdict (<<) cvn ([) cvn load put\r\n userdict (>>) cvn ([) cvn load put\r\n} if\r\n\r\n%%EndSetup\r\nsetupLatin1\r\n%%Page: 1 1\r\n%%PageBoundingBox: 36 36 44 44\r\n%%PageOrientation: Portrait\r\n0 0 1 beginpage\r\ngsave\r\n36 36 8 8 boxprim clip newpath\r\n1 1 set_scale 0 rotate 40 40 translate\r\nendpage\r\nshowpage\r\ngrestore\r\n%%PageTrailer\r\n%%EndPage: 1\r\n%%Trailer\r\n%%Pages: 1\r\n%%BoundingBox: 36 36 44 44\r\nend\r\nrestore\r\n%%EOF\r\n"
First thing first, the error:
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 52, in model_to_dot
_check_pydot()
If we check the file C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py and search for the function _check_pydot():
def _check_pydot():
try:
# Attempt to create an image of a blank graph
# to check the pydot/graphviz installation.
pydot.Dot.create(pydot.Dot())
except Exception:
# pydot raises a generic Exception here,
# so no specific class can be caught.
raise ImportError('Failed to import pydot. You must install pydot'
' and graphviz for `pydotprint` to work.')
This error message is lack of information since it catch ALL exception instead of specific execption and raise hard-coded error ImportError(Failed to import blah blah).
To ensure it import the relevant pydot, we should also check import part in that file (Rerun R and library(kerasR) to test):
import os
print("hole 0")
try:
# pydot-ng is a fork of pydot that is better maintained.
import pydot_ng as pydot
print("hole 1")
except ImportError:
# pydotplus is an improved version of pydot
try:
print("hole 1.2")
import pydotplus as pydot
print("hole 2")
except ImportError:
# Fall back on pydot if necessary.
try:
print("hole 3")
import pydot
except ImportError:
print("hole 4")
pydot = None
print("hole -1: " + str(locals())) #alternative way to debug
...
Tips: The safer way to debug is userepr instead of str.
If you manually run python in interactive mode and do pydot.Dot.create(pydot.Dot()), you will find out the exact exception (below is my Linux sample):
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ ./python
Python 3.6.3 |Anaconda, Inc.| (default, Nov 20 2017, 20:41:42)
[GCC 7.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pydot
>>> pydot.Dot()
<pydot.Dot object at 0x7f7d045cdb38>
>>> pydot.Dot
<class 'pydot.Dot'>
>>> pydot.Dot.create(pydot.Dot())
Traceback (most recent call last):
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py", line 1878, in create
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/subprocess.py", line 1344, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: 'dot': 'dot'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py", line 1883, in create
prog=prog))
Exception: "dot" not found in path.
>>>
Let's print some variables used in the file /home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py before the line 1878:
try:
print("env: " + str(env))
print("cmdline: " + str(cmdline))
print("tmp_dir: " + str(tmp_dir))
p = subprocess.Popen(
cmdline,
env=env,
cwd=tmp_dir,
shell=False,
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
except OSError as e:
if e.errno == os.errno.ENOENT:
raise Exception(
'"{prog}" not found in path.'.format(
prog=prog))
else:
raise
Restart your python interpreter, rerun the import pydot and pydot.Dot.create(pydot.Dot()), it will shows:
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ ./python
Python 3.6.3 |Anaconda, Inc.| (default, Nov 20 2017, 20:41:42)
[GCC 7.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pydot
>>> pydot.Dot.create(pydot.Dot())
env: {'PATH': '/home/xiaobai/anaconda3/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:...<My other PATH>'}
cmdline: ['dot', '-Tps', '/tmp/tmpffo17gx5']
tmp_dir: /tmp
Traceback (most recent call last):
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py", line 1881, in create
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
So basically what it does is run the command dot -Tps /tmp/tmpffo17gx5 but failed due to dot command not found.
In Linux, it will suggest run sudo apt install graphviz to install dot if I run the command manually in terminal:
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ dot
The program 'dot' is currently not installed. You can install it by typing:
sudo apt install graphviz
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ sudo apt install graphviz
...
Run dot -Tps /tmp/tmpffo17gx5 will success now:
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ dot -Tps /tmp/tmpffo17gx5
%!PS-Adobe-3.0
%%Creator: graphviz version 2.38.0 (20140413.2041)
%%Title: G
%%Pages: (atend)
%%BoundingBox: (atend)
%%EndComments
save
%%BeginProlog
/DotDict 200 dict def
DotDict begin
...
Restart R session, no more error:
> plot_model(model)
>
This sudo apt install graphviz is for Linux, but I hope this answer help you debug the error in Windows.
You should install the Python libraries:
pip install pydot graphviz
And also you need to download the graphviz binaries, and these are not installed with Python.
On Ubuntu you can install them with apt:
apt-get install -y graphviz libgraphviz-dev
On osX with brew:
brew install graphviz
For Windows and other operating systems, the instructions can be found at http://www.graphviz.org/

PySpark map datetime to DoW

I'm trying to map a column 'eventtimestamp' to its day of week with the following function:
from datetime import datetime
import calendar
from pyspark.sql.functions import UserDefinedFunction as udf
def toWeekDay(x):
v = int(datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S').strftime('%w'))
if v == 0:
v = 6
else:
v = v-1
return calendar.day_name[v]
and for my df trying to create a new column dow with UDF.
udf_toWeekDay = udf(lambda x: toWeekDay(x), StringType())
df = df.withColumn("dow",udf_toWeekDay('eventtimestamp'))
Yet, I'm getting error I do not understand at all. Firstly, it was complaining for inserting datetime.datetime into strptime instead of string. So I parsed to str and now I don't have a clue what's wrong.
Traceback (most recent call last):
File "/tmp/zeppelin_pyspark-9040214714346906648.py", line 267, in <module>
raise Exception(traceback.format_exc())
Exception: Traceback (most recent call last):
File "/tmp/zeppelin_pyspark-9040214714346906648.py", line 260, in <module>
exec(code)
File "<stdin>", line 10, in <module>
File "/usr/lib/spark/python/pyspark/sql/dataframe.py", line 429, in take
return self.limit(num).collect()
File "/usr/lib/spark/python/pyspark/sql/dataframe.py", line 391, in collect
port = self._jdf.collectToPython()
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/usr/lib/spark/python/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o6250.collectToPython.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1107.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1107.0 (TID 63757, ip-172-31-27-113.eu-west-1.compute.internal, executor 819): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
Thanks a lot for clues!
we can use date_format to get dayofweek,
df = df.withColumn("dow",date_format(df['eventtimestamp'],'EEEE'))

piplinedRDD can't convert to dataframe using toDF

I have a pyspark dataframe contains rows of data seperated by comma. I want to split each row and apply LabeledPoints method to it. Then covnert it to dataframe.
Here is my code
import os.path
from pyspark.mllib.regression import LabeledPoint
import numpy as np
file_name = os.path.join('databricks-datasets', 'cs190', 'data-001', 'millionsong.txt')
raw_data_df = sqlContext.read.load(file_name, 'text')
rdd = raw_data_df.rdd.map(lambda line: line.split(',')).map(lambda seq:LabeledPoints(seq[0],seq[1:])).toDF()
It gives the following error message after apply .DF().
---------------------------------------------------------------------------
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 38.0 failed 1 times, most recent failure: Lost task 0.0 in stage 38.0 (TID 44, localhost): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
Py4JJavaError Traceback (most recent call last)
<ipython-input-65-dc4d86a8ee45> in <module>()
----> 1 rdd = raw_data_df.rdd.map(lambda line: line.split(',')).map(lambda seq:LabeledPoints(seq[0],seq[1:])).toDF()
2 print(type(rdd))
3 #print(rdd.take(5))
/databricks/spark/python/pyspark/sql/context.py in toDF(self, schema, sampleRatio)
62 [Row(name=u'Alice', age=1)]
63 """
---> 64 return sqlContext.createDataFrame(self, schema, sampleRatio)
65
66 RDD.toDF = toDF
/databricks/spark/python/pyspark/sql/context.py in createDataFrame(self, data, schema, samplingRatio)
421
422 if isinstance(data, RDD):
--> 423 rdd, schema = self._createFromRDD(data, schema, samplingRatio)
424 else:
425 rdd, schema = self._createFromLocal(data, schema)
/databricks/spark/python/pyspark/sql/context.py in _createFromRDD(self, rdd, schema, samplingRatio)
Answer found:
rdd = raw_data_df.map(lambda row: row['value'].split(',')).map(lambda seq:LabeledPoint(float(seq[0]),seq[1:])).toDF()
Here, I need to specifically reference each line of text using row['value'], even though there is only one feature in the row.

Resources