NameError: name 'dataset_zip' is not defined - jupyter-notebook

enter image description here
NameError Traceback (most recent call last)
in
2 print(tf.version)
3 import zipfile
----> 4 with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
5 zip_ref.extractall(directory_to_extract_to)
6
NameError: name 'dataset_zip' is not defined

Related

Jupyter script with images for standalone

New to python created a very simple script but seem to be running into issues with images.
I want to take this script and create a standalone.
However when I did it originally without tkinter or imagetk the file would open and close immediately so I tried this.
You can see the error messages I get below the code.
> <sub>`import tkinter as tk from PIL import Image, ImageTk
>
> while True:
> name = input("\nWhat is your name? ")
>
> print(f'\nNice to meet you {name}')
>
> age = input("\nHow old are you? ")
> age = int(age)
> if age >= 40:
> # Open the "old" image
> image = Image.open(r'old.jpg')
> else:
> # Open the "young" image
> image = Image.open(r'young.jpg')
>
> # Convert the image to a PhotoImage object
> photo_image = ImageTk.PhotoImage(image)
>
> # Create a Tkinter window
> root = tk.Tk()
>
> # Create a label and set the image as its background
> label = tk.Label(root, image=photo_image)
> label.pack()
>
> # Run the Tkinter event loop
> root.mainloop()
>
> exit_prompt = input("\nEnter 'exit' to close the script, or press Enter to continue: ")
> if exit_prompt == "exit":
> break
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_9860\2051054611.py in <module>
17
18 # Convert the image to a PhotoImage object
---> 19 photo_image = ImageTk.PhotoImage(image)
20
21 # Create a Tkinter window
~\anaconda3\lib\site-packages\PIL\ImageTk.py in __init__(self, image, size, **kw)
138 self.__mode = mode
139 self.__size = size
--> 140 self.__photo = tkinter.PhotoImage(**kw)
141 self.tk = self.__photo.tk
142 if image:
~\anaconda3\lib\tkinter\__init__.py in __init__(self, name, cnf, master, **kw)
4062 Valid resource names: data, format, file, gamma, height, palette,
4063 width."""
-> 4064 Image.__init__(self, 'photo', name, cnf, master, **kw)
4065
4066 def blank(self):
~\anaconda3\lib\tkinter\__init__.py in __init__(self, imgtype, name, cnf, master, **kw)
3995 self.name = None
3996 if not master:
-> 3997 master = _get_default_root('create image')
3998 self.tk = getattr(master, 'tk', master)
3999 if not name:
~\anaconda3\lib\tkinter\__init__.py in _get_default_root(what)
295 if not _default_root:
296 if what:
--> 297 raise RuntimeError(f"Too early to {what}: no default root window")
298 root = Tk()
299 assert _default_root is root
RuntimeError: Too early to create image: no defa
ult root window`
I think you only need to call image.show() after image is opened if you want to show it.
from PIL import Image
while True:
name = input("\nWhat is your name? ")
print(f'\nNice to meet you {name}')
age = input("\nHow old are you? ")
age = int(age)
if age >= 40:
# Open the "old" image
image = Image.open(r'old.jpg')
else:
# Open the "young" image
image = Image.open(r'young.jpg')
image.show()
Once that issue was fixed I was getting the following error:
--------------------------------------------------------------------------- TclError Traceback (most recent call
last) ~\AppData\Local\Temp\ipykernel_11772\4142766009.py in
26
27 # Create a label and set the image as its background
---> 28 label = tk.Label(root, image=photo_image)
29 label.image = photo_image # Keep a reference to the PhotoImage object
30 label.pack()
~\anaconda3\lib\tkinter_init_.py in init(self, master, cnf,
**kw) 3146 3147 """
-> 3148 Widget.init(self, master, 'label', cnf, kw) 3149 3150
~\anaconda3\lib\tkinter_init_.py in init(self, master,
widgetName, cnf, kw, extra) 2570 for k, v in classes:
2571 del cnf[k]
-> 2572 self.tk.call( 2573 (widgetName, self._w) + extra + self._options(cnf)) 2574 for k, v in
classes:
TclError: image "pyimage28" doesn't exist
I tried fixing it by storing the image:
import tkinter as tk from tkinter import PhotoImage from PIL import
Image, ImageTk
while True:
name = input("\nWhat is your name? ")
print(f'\nNice to meet you {name}')
age = input("\nHow old are you? ")
age = int(age)
if age >= 40:
# Open the "old" image
image = Image.open(r'old.jpg')
else:
# Open the "young" image
image = Image.open(r'young.jpg')
# Convert the image to a PhotoImage object
photo_image = PhotoImage(image)
# Create a Tkinter window
root = tk.Tk()
# Create a label and set the image as its background
label = tk.Label(root, image=photo_image)
label.image = photo_image # Keep a reference to the PhotoImage object
label.pack()
# Run the Tkinter event loop
root.mainloop()
exit_prompt = input("\nEnter 'exit' to close the script, or press Enter to continue: ")
if exit_prompt == "exit":
break

Unable to import cfgrib

Whenever I try importing cfgrib it gives me runtime error that it could not load ecCodes library
import cfgrib
Here's the full error message
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_6224/857012844.py in <module>
----> 1 import cfgrib
~/.local/lib/python3.8/site-packages/cfgrib/__init__.py in <module>
17
18 # cfgrib core API depends on the ECMWF ecCodes C-library only
---> 19 from .cfmessage import CfMessage
20 from .dataset import Dataset, DatasetBuildError, open_file, open_fileindex
21 from .messages import FileStream, Message
~/.local/lib/python3.8/site-packages/cfgrib/cfmessage.py in <module>
27 import numpy as np
28
---> 29 from . import abc, messages
30
31 LOG = logging.getLogger(__name__)
~/.local/lib/python3.8/site-packages/cfgrib/messages.py in <module>
26
27 import attr
---> 28 import eccodes # type: ignore
29 import numpy as np
30
~/.local/lib/python3.8/site-packages/eccodes/__init__.py in <module>
13 import sys
14
---> 15 from .eccodes import *
16 from .eccodes import __version__
17 from .eccodes import bindings_version
~/.local/lib/python3.8/site-packages/eccodes/eccodes.py in <module>
10 #
11 #
---> 12 from gribapi import __version__
13 from gribapi import bindings_version
14
~/.local/lib/python3.8/site-packages/gribapi/__init__.py in <module>
11 #
12
---> 13 from .gribapi import * # noqa
14 from .gribapi import __version__
15 from .gribapi import bindings_version
~/.local/lib/python3.8/site-packages/gribapi/gribapi.py in <module>
2226
2227
-> 2228 __version__ = grib_get_api_version()
2229
2230
~/.local/lib/python3.8/site-packages/gribapi/gribapi.py in grib_get_api_version()
2216
2217 if not lib:
-> 2218 raise RuntimeError("Could not load the ecCodes library!")
2219
2220 v = lib.grib_get_api_version()
RuntimeError: Could not load the ecCodes library!
I have installed cfgrib and ecCodes through pip
cfgrib 0.9.9.1
eccodes 1.4.0
eccodes-python 0.9.9
As stated on pypi (https://pypi.org/project/eccodes/) the eccodes python package relies on the eccodes system library. Based on the error message, you do not have it installed.
It is probably easiest to install it using conda:
conda install -c conda-forge eccodes

"Failed to import pydot" throws in kerasR

I use the package keras under R and I would like to know if there was a command like python with plot_model () which allows to display its neuron network
library(keras)
for example I would like to display this neural network under R
model <- keras_model_sequential()
model %>%
layer_dense(units = 5, input_shape = 2) %>%
layer_activation("relu") %>%
layer_dense(units = 1)
I install package kerasR for use the function plot_model(), but i have this error.
> library(kerasR)
> plot_model(model)
Error in py_call_impl(callable, dots$args, dots$keywords) :
ImportError: Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.
Detailed traceback:
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 131, in plot_model
dot = model_to_dot(model, show_shapes, show_layer_names, rankdir)
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 52, in model_to_dot
_check_pydot()
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 27, in _check_pydot
raise ImportError('Failed to import pydot. You must install pydot'
I'm use windows 10 64 bits, i use RStudio with Anaconda
In [4] pydot.Dot.create(pydot.Dot())
Out[4]: b"%!PS-Adobe-3.0\r\n%%Creator: graphviz version 2.38.0 (20140413.2041)\r\n%%Title: G\r\n%%Pages: (atend)\r\n%%BoundingBox: (atend)\r\n%%EndComments\r\nsave\r\n%%BeginProlog\r\n/DotDict 200 dict def\r\nDotDict begin\r\n\r\n/setupLatin1 {\r\nmark\r\n/EncodingVector 256 array def\r\n EncodingVector 0\r\n\r\nISOLatin1Encoding 0 255 getinterval putinterval\r\nEncodingVector 45 /hyphen put\r\n\r\n% Set up ISO Latin 1 character encoding\r\n/starnetISO {\r\n dup dup findfont dup length dict begin\r\n { 1 index /FID ne { def }{ pop pop } ifelse\r\n } forall\r\n /Encoding EncodingVector def\r\n currentdict end definefont\r\n} def\r\n/Times-Roman starnetISO def\r\n/Times-Italic starnetISO def\r\n/Times-Bold starnetISO def\r\n/Times-BoldItalic starnetISO def\r\n/Helvetica starnetISO def\r\n/Helvetica-Oblique starnetISO def\r\n/Helvetica-Bold starnetISO def\r\n/Helvetica-BoldOblique starnetISO def\r\n/Courier starnetISO def\r\n/Courier-Oblique starnetISO def\r\n/Courier-Bold starnetISO def\r\n/Courier-BoldOblique starnetISO def\r\ncleartomark\r\n} bind def\r\n\r\n%%BeginResource: procset graphviz 0 0\r\n/coord-font-family /Times-Roman def\r\n/default-font-family /Times-Roman def\r\n/coordfont coord-font-family findfont 8 scalefont def\r\n\r\n/InvScaleFactor 1.0 def\r\n/set_scale {\r\n dup 1 exch div /InvScaleFactor exch def\r\n scale\r\n} bind def\r\n\r\n% styles\r\n/solid { [] 0 setdash } bind def\r\n/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def\r\n/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def\r\n/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def\r\n/bold { 2 setlinewidth } bind def\r\n/filled { } bind def\r\n/unfilled { } bind def\r\n/rounded { } bind def\r\n/diagonals { } bind def\r\n/tapered { } bind def\r\n\r\n% hooks for setting color \r\n/nodecolor { sethsbcolor } bind def\r\n/edgecolor { sethsbcolor } bind def\r\n/graphcolor { sethsbcolor } bind def\r\n/nopcolor {pop pop pop} bind def\r\n\r\n/beginpage {\t% i j npages\r\n\t/npages exch def\r\n\t/j exch def\r\n\t/i exch def\r\n\t/str 10 string def\r\n\tnpages 1 gt {\r\n\t\tgsave\r\n\t\t\tcoordfont setfont\r\n\t\t\t0 0 moveto\r\n\t\t\t(\\() show i str cvs show (,) show j str cvs show (\\)) show\r\n\t\tgrestore\r\n\t} if\r\n} bind def\r\n\r\n/set_font {\r\n\tfindfont exch\r\n\tscalefont setfont\r\n} def\r\n\r\n% draw text fitted to its expected width\r\n/alignedtext {\t\t\t% width text\r\n\t/text exch def\r\n\t/width exch def\r\n\tgsave\r\n\t\twidth 0 gt {\r\n\t\t\t[] 0 setdash\r\n\t\t\ttext stringwidth pop width exch sub text length div 0 text ashow\r\n\t\t} if\r\n\tgrestore\r\n} def\r\n\r\n/boxprim {\t\t\t\t% xcorner ycorner xsize ysize\r\n\t\t4 2 roll\r\n\t\tmoveto\r\n\t\t2 copy\r\n\t\texch 0 rlineto\r\n\t\t0 exch rlineto\r\n\t\tpop neg 0 rlineto\r\n\t\tclosepath\r\n} bind def\r\n\r\n/ellipse_path {\r\n\t/ry exch def\r\n\t/rx exch def\r\n\t/y exch def\r\n\t/x exch def\r\n\tmatrix currentmatrix\r\n\tnewpath\r\n\tx y translate\r\n\trx ry scale\r\n\t0 0 1 0 360 arc\r\n\tsetmatrix\r\n} bind def\r\n\r\n/endpage { showpage } bind def\r\n/showpage { } def\r\n\r\n/layercolorseq\r\n\t[\t% layer color sequence - darkest to lightest\r\n\t\t[0 0 0]\r\n\t\t[.2 .8 .8]\r\n\t\t[.4 .8 .8]\r\n\t\t[.6 .8 .8]\r\n\t\t[.8 .8 .8]\r\n\t]\r\ndef\r\n\r\n/layerlen layercolorseq length def\r\n\r\n/setlayer {/maxlayer exch def /curlayer exch def\r\n\tlayercolorseq curlayer 1 sub layerlen mod get\r\n\taload pop sethsbcolor\r\n\t/nodecolor {nopcolor} def\r\n\t/edgecolor {nopcolor} def\r\n\t/graphcolor {nopcolor} def\r\n} bind def\r\n\r\n/onlayer { curlayer ne {invis} if } def\r\n\r\n/onlayers {\r\n\t/myupper exch def\r\n\t/mylower exch def\r\n\tcurlayer mylower lt\r\n\tcurlayer myupper gt\r\n\tor\r\n\t{invis} if\r\n} def\r\n\r\n/curlayer 0 def\r\n\r\n%%EndResource\r\n%%EndProlog\r\n%%BeginSetup\r\n14 default-font-family set_font\r\n1 setmiterlimit\r\n% /arrowlength 10 def\r\n% /arrowwidth 5 def\r\n\r\n% make sure pdfmark is harmless for PS-interpreters other than Distiller\r\n/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse\r\n% make '<<' and '>>' safe on PS Level 1 devices\r\n/languagelevel where {pop languagelevel}{1} ifelse\r\n2 lt {\r\n userdict (<<) cvn ([) cvn load put\r\n userdict (>>) cvn ([) cvn load put\r\n} if\r\n\r\n%%EndSetup\r\nsetupLatin1\r\n%%Page: 1 1\r\n%%PageBoundingBox: 36 36 44 44\r\n%%PageOrientation: Portrait\r\n0 0 1 beginpage\r\ngsave\r\n36 36 8 8 boxprim clip newpath\r\n1 1 set_scale 0 rotate 40 40 translate\r\nendpage\r\nshowpage\r\ngrestore\r\n%%PageTrailer\r\n%%EndPage: 1\r\n%%Trailer\r\n%%Pages: 1\r\n%%BoundingBox: 36 36 44 44\r\nend\r\nrestore\r\n%%EOF\r\n"
First thing first, the error:
File "C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py", line 52, in model_to_dot
_check_pydot()
If we check the file C:\Users\Idriss\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\vis_utils.py and search for the function _check_pydot():
def _check_pydot():
try:
# Attempt to create an image of a blank graph
# to check the pydot/graphviz installation.
pydot.Dot.create(pydot.Dot())
except Exception:
# pydot raises a generic Exception here,
# so no specific class can be caught.
raise ImportError('Failed to import pydot. You must install pydot'
' and graphviz for `pydotprint` to work.')
This error message is lack of information since it catch ALL exception instead of specific execption and raise hard-coded error ImportError(Failed to import blah blah).
To ensure it import the relevant pydot, we should also check import part in that file (Rerun R and library(kerasR) to test):
import os
print("hole 0")
try:
# pydot-ng is a fork of pydot that is better maintained.
import pydot_ng as pydot
print("hole 1")
except ImportError:
# pydotplus is an improved version of pydot
try:
print("hole 1.2")
import pydotplus as pydot
print("hole 2")
except ImportError:
# Fall back on pydot if necessary.
try:
print("hole 3")
import pydot
except ImportError:
print("hole 4")
pydot = None
print("hole -1: " + str(locals())) #alternative way to debug
...
Tips: The safer way to debug is userepr instead of str.
If you manually run python in interactive mode and do pydot.Dot.create(pydot.Dot()), you will find out the exact exception (below is my Linux sample):
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ ./python
Python 3.6.3 |Anaconda, Inc.| (default, Nov 20 2017, 20:41:42)
[GCC 7.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pydot
>>> pydot.Dot()
<pydot.Dot object at 0x7f7d045cdb38>
>>> pydot.Dot
<class 'pydot.Dot'>
>>> pydot.Dot.create(pydot.Dot())
Traceback (most recent call last):
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py", line 1878, in create
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/subprocess.py", line 1344, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: 'dot': 'dot'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py", line 1883, in create
prog=prog))
Exception: "dot" not found in path.
>>>
Let's print some variables used in the file /home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py before the line 1878:
try:
print("env: " + str(env))
print("cmdline: " + str(cmdline))
print("tmp_dir: " + str(tmp_dir))
p = subprocess.Popen(
cmdline,
env=env,
cwd=tmp_dir,
shell=False,
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
except OSError as e:
if e.errno == os.errno.ENOENT:
raise Exception(
'"{prog}" not found in path.'.format(
prog=prog))
else:
raise
Restart your python interpreter, rerun the import pydot and pydot.Dot.create(pydot.Dot()), it will shows:
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ ./python
Python 3.6.3 |Anaconda, Inc.| (default, Nov 20 2017, 20:41:42)
[GCC 7.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pydot
>>> pydot.Dot.create(pydot.Dot())
env: {'PATH': '/home/xiaobai/anaconda3/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:...<My other PATH>'}
cmdline: ['dot', '-Tps', '/tmp/tmpffo17gx5']
tmp_dir: /tmp
Traceback (most recent call last):
File "/home/xiaobai/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/pydot.py", line 1881, in create
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
So basically what it does is run the command dot -Tps /tmp/tmpffo17gx5 but failed due to dot command not found.
In Linux, it will suggest run sudo apt install graphviz to install dot if I run the command manually in terminal:
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ dot
The program 'dot' is currently not installed. You can install it by typing:
sudo apt install graphviz
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ sudo apt install graphviz
...
Run dot -Tps /tmp/tmpffo17gx5 will success now:
xb#dnxb:~/anaconda3/envs/r-tensorflow/bin$ dot -Tps /tmp/tmpffo17gx5
%!PS-Adobe-3.0
%%Creator: graphviz version 2.38.0 (20140413.2041)
%%Title: G
%%Pages: (atend)
%%BoundingBox: (atend)
%%EndComments
save
%%BeginProlog
/DotDict 200 dict def
DotDict begin
...
Restart R session, no more error:
> plot_model(model)
>
This sudo apt install graphviz is for Linux, but I hope this answer help you debug the error in Windows.
You should install the Python libraries:
pip install pydot graphviz
And also you need to download the graphviz binaries, and these are not installed with Python.
On Ubuntu you can install them with apt:
apt-get install -y graphviz libgraphviz-dev
On osX with brew:
brew install graphviz
For Windows and other operating systems, the instructions can be found at http://www.graphviz.org/

PySpark map datetime to DoW

I'm trying to map a column 'eventtimestamp' to its day of week with the following function:
from datetime import datetime
import calendar
from pyspark.sql.functions import UserDefinedFunction as udf
def toWeekDay(x):
v = int(datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S').strftime('%w'))
if v == 0:
v = 6
else:
v = v-1
return calendar.day_name[v]
and for my df trying to create a new column dow with UDF.
udf_toWeekDay = udf(lambda x: toWeekDay(x), StringType())
df = df.withColumn("dow",udf_toWeekDay('eventtimestamp'))
Yet, I'm getting error I do not understand at all. Firstly, it was complaining for inserting datetime.datetime into strptime instead of string. So I parsed to str and now I don't have a clue what's wrong.
Traceback (most recent call last):
File "/tmp/zeppelin_pyspark-9040214714346906648.py", line 267, in <module>
raise Exception(traceback.format_exc())
Exception: Traceback (most recent call last):
File "/tmp/zeppelin_pyspark-9040214714346906648.py", line 260, in <module>
exec(code)
File "<stdin>", line 10, in <module>
File "/usr/lib/spark/python/pyspark/sql/dataframe.py", line 429, in take
return self.limit(num).collect()
File "/usr/lib/spark/python/pyspark/sql/dataframe.py", line 391, in collect
port = self._jdf.collectToPython()
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/usr/lib/spark/python/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o6250.collectToPython.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1107.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1107.0 (TID 63757, ip-172-31-27-113.eu-west-1.compute.internal, executor 819): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
Thanks a lot for clues!
we can use date_format to get dayofweek,
df = df.withColumn("dow",date_format(df['eventtimestamp'],'EEEE'))

piplinedRDD can't convert to dataframe using toDF

I have a pyspark dataframe contains rows of data seperated by comma. I want to split each row and apply LabeledPoints method to it. Then covnert it to dataframe.
Here is my code
import os.path
from pyspark.mllib.regression import LabeledPoint
import numpy as np
file_name = os.path.join('databricks-datasets', 'cs190', 'data-001', 'millionsong.txt')
raw_data_df = sqlContext.read.load(file_name, 'text')
rdd = raw_data_df.rdd.map(lambda line: line.split(',')).map(lambda seq:LabeledPoints(seq[0],seq[1:])).toDF()
It gives the following error message after apply .DF().
---------------------------------------------------------------------------
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 38.0 failed 1 times, most recent failure: Lost task 0.0 in stage 38.0 (TID 44, localhost): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
Py4JJavaError Traceback (most recent call last)
<ipython-input-65-dc4d86a8ee45> in <module>()
----> 1 rdd = raw_data_df.rdd.map(lambda line: line.split(',')).map(lambda seq:LabeledPoints(seq[0],seq[1:])).toDF()
2 print(type(rdd))
3 #print(rdd.take(5))
/databricks/spark/python/pyspark/sql/context.py in toDF(self, schema, sampleRatio)
62 [Row(name=u'Alice', age=1)]
63 """
---> 64 return sqlContext.createDataFrame(self, schema, sampleRatio)
65
66 RDD.toDF = toDF
/databricks/spark/python/pyspark/sql/context.py in createDataFrame(self, data, schema, samplingRatio)
421
422 if isinstance(data, RDD):
--> 423 rdd, schema = self._createFromRDD(data, schema, samplingRatio)
424 else:
425 rdd, schema = self._createFromLocal(data, schema)
/databricks/spark/python/pyspark/sql/context.py in _createFromRDD(self, rdd, schema, samplingRatio)
Answer found:
rdd = raw_data_df.map(lambda row: row['value'].split(',')).map(lambda seq:LabeledPoint(float(seq[0]),seq[1:])).toDF()
Here, I need to specifically reference each line of text using row['value'], even though there is only one feature in the row.

Resources