export_graphviz is not defined error!!
Used the following codes in Jupyrer notebook Python 3.x
`def show_tree (tree, features, path):
f = io.StringIO()
export_graphviz(tree, out_file = f, feature_names=features )
pydotplus.graph_from_dot_data(f.getvalue()).write_png(path)
img = misc.imread(path)
plt.rcParams['figure.figsize']=[20,20]
plt.imshow(img)
show_tree(dt,features,'dc_tree.png')
`
Got the following error when calling the function show_tree
`NameError Traceback (most recent call last)
<ipython-input-21-96002279767f> in <module>()
----> 1 show_tree(dt,features,'dc_tree.png')
`<ipython-input-20-f73dae020a9a> in show_tree(tree, features, path)
4 def show_tree (tree, features, path):
5 f = io.StringIO()
----> 6 export_graphviz(tree, out_file = f, feature_names=features )
7 pydotplus.graph_from_dot_data(f.getvalue()).write_png(path)
8 img = misc.imread(path)`
NameError: name 'export_graphviz' is not defined
To use the export_graphviz exporter you need to import it from sklearn.tree.
Try
from sklearn import tree
(see example at the bottom of this page)
or
from sklearn.tree import DecisionTreeClassifier, export_graphviz
Related
I am trying to upload the image from the local system within the same directory. Post uploading, when I am passing through open cv split and merge for b,g, and r colors, i get the error ValueError: not enough values to unpack (expected 3, got 0)
Error :
this is the error that is showing is there any possibility to debug in the streamlit where I can track changes at different lines of code? (As in the image path,) when executed in a google collab as individual ipynb files run properly and I get by required classification
ValueError: not enough values to unpack (expected 3, got 0)
Traceback:
File "C:\Users\ADARSH\anaconda3\lib\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 564, in _run_script
exec(code, module.__dict__)
File "C:\Users\ADARSH\streamlit\deploy_test.py", line 76, in <module>
main()
File "C:\Users\ADARSH\streamlit\deploy_test.py", line 68, in main
mask = imageToTensor('image')
File "C:\Users\ADARSH\streamlit\deploy_test.py", line 44, in imageToTensor
b,g,r = cv2.split(bgr_img)
My entire streamlit app code
from pathlib import Path
import cv2
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
from sklearn.utils import shuffle
from tqdm import tqdm_notebook
import streamlit as st
from PIL import Image as impo
from fastai import *
from fastai.vision import *
from torchvision.models import *
class MyImageItemList(ImageList):
def open(self, fn:PathOrStr)->Image:
img = readCroppedImage(fn.replace('/./','').replace('//','/'))
# This ndarray image has to be converted to tensor before passing on as fastai Image, we can use pil2tensor
return vision.Image(px=pil2tensor(img, np.float32))
def read_image(name):
image = st.file_uploader("Upload an "+ name, type=["png", "jpg", "jpeg",'tif'])
if image is not None:
im = impo.open(image)
im.filename = image.name
return image
def imageToTensor(image):
sz = 68
bgr_img = cv2.imread(image)
b,g,r = cv2.split(bgr_img)
rgb_img = cv2.merge([r,g,b])
# crop to center to the correct size and convert from 0-255 range to 0-1 range
H,W,C = rgb_img.shape
rgb_img = rgb_img[(H-sz)//2:(sz +(H-sz)//2),(H-sz)//2:(sz +(H-sz)//2),:] / 256
return vision.Image(px=pil2tensor(rgb_img, np.float32))
def learn_infernce():
return load_learner('./')
def get_prediction(image):
if st.button('Classify'):
pred, pred_idx, probs = learn_inference.predict(image)
classes = ['negative', 'tumor']
st.write(f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}')
else:
st.write(f'Click the button to classify')
def main():
st.set_page_config(page_title='Cancer detection', page_icon=None, layout='centered', initial_sidebar_state='auto')
image = read_image('image')
mask = imageToTensor('image')
if mask is not None:
get_prediction('mask')
if __name__ == "__main__":
main()
In your main function you are passing 'str' instead of variables, and also I think your read_image is not well structured.
What you should do is to first save the uploaded file in a directory and fetch the file from that directory and pass it to imageToTensor() as a parameter. That's one work around which will give you a total control over the file. Otherwise you will get other error messages after the first error is fixed.
You can automate some few lines of code in a separate python file to delete the uploaded file from the directory with a given duration.
Note: Keep an eye on the imports because I skipped them to keep the code short
class MyImageItemList(ImageList):
def open(self, fn:PathOrStr)->Image:
img = readCroppedImage(fn.replace('/./','').replace('//','/'))
# This ndarray image has to be converted to tensor before passing on as fastai Image, we can use pil2tensor
return vision.Image(px=pil2tensor(img, np.float32))
# Refactured read_image()
def get_uploaded_image():
upload = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg",'tif'])
if upload is not None:
st.write(upload.name)
# Create a directory and save the image file before proceeding.
file_path = os.path.join("data/uploadedImages/", upload.name)
with open(file_path, "wb") as user_file:
user_file.write(upload.getbuffer())
return file_path # fixed indentation
def imageToTensor(image):
sz = 68
bgr_img = cv2.imread(image)
b,g,r = cv2.split(bgr_img)
rgb_img = cv2.merge([r,g,b])
# crop to center to the correct size and convert from 0-255 range to 0-1 range
H,W,C = rgb_img.shape
rgb_img = rgb_img[(H-sz)//2:(sz +(H-sz)//2),(H-sz)//2:(sz +(H-sz)//2),:] / 256
return vision.Image(px=pil2tensor(rgb_img, np.float32))
def learn_infernce():
return load_learner('./')
def get_prediction(image):
if st.button('Classify'):
pred, pred_idx, probs = learn_inference.predict(image)
classes = ['negative', 'tumor']
st.write(f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}')
else:
st.write(f'Click the button to classify')
def main():
st.set_page_config(page_title='Cancer detection', page_icon=None, layout='centered', initial_sidebar_state='auto')
# Holds the saved file path
user_image = get_uploaded_image()
if user_image is not None:
# Pass the path to imageToTensor() as a parameter.
mask = imageToTensor(user_image)
get_prediction(mask)
if __name__ == "__main__":
main()
output:
I'm hoping somebody could please assist. When running the following code (below) in Jupyter notebook, I get an error
dummydata["ID_NUMBER"] = dummydata["ID_NUMBER"].to_string()
def clean_dummydata(dummydata,cols):
for col_name in cols:
keys = {cats: i for i,cats in str(hash(dummydata[col_name].unique()))}
dummydata[col_name] = dummydata[col_name].apply(lambda x: keys[x])
return dummydata
cols = ['ID_NUMBER']
dummydata = clean_dummydata(dummydata,cols)
dummydata.to_csv('anon_dummydata.csv')
This is the error:
TypeError Traceback (most recent call
last) ~\AppData\Local\Temp/ipykernel_3140/2100616149.py in
7
8 cols = ['ID_NUMBER']
----> 9 dummydata = clean_dummydata(dummydata,cols)
10 dummydata.to_csv('anon_dummydata.csv')
~\AppData\Local\Temp/ipykernel_3140/2100616149.py in
clean_dummydata(dummydata, cols)
2 def clean_dummydata(dummydata,cols):
3 for col_name in cols:
----> 4 keys = {cats: i for i,cats in str(hash(dummydata[col_name].unique()))}
5 dummydata[col_name] = dummydata[col_name].apply(lambda x: keys[x])
6 return dummydata
TypeError: unhashable type: 'numpy.ndarray'
Mutable types like NumPy arrays and lists are not hashable because they could change and break the lookup based on the hashing algorithm.
So, you can use hash only with immutable datatypes like a tuple. So, you can convert your numpy array into a tuple and then hash it, for eg:
import numpy as np
z = np.array(['one', 'two', 'three'])
tuple_z = tuple(z)
hash_z = hash(z)
and it should run perfectly.
class A:
def __init__(self):
print(" ")
def have (self,a,b):
a=int(input("Enter A : "))
b=int(input("Enter b : "))
class B(A):
def add(self,sum1):
sum1=A+B
print("Sum is: ",sum1)
obj = B()
obj.add()
the errors came
TypeError Traceback (most recent call last)
<ipython-input-47-714fd5e33719> in <module>
13
14 obj = B()
---> 15 obj.add()
TypeError: add() missing 1 required positional argument: 'sum1'
Change your code a bit like so:
class A:
def __init__(self):
print(" ")
def have (self):
# tell code to use this class's a and b variabes
self.a = int(input("Enter A : "))
self.b = int(input("Enter b : "))
class B(A):
# don't include anything in add except self
# because a and b are already in class A
def add(self):
# use self.a and self.b that were inherited from A
sum1 = self.a + self.b
print("Sum is: ",sum1)
obj = B()
# call the have method first to ask for inputs
obj.have()
obj.add()
Now, when you call obj.have(), you will be asked what A and B should be. But note that you are using capital A and B only as display. You are storing data into lowercase a and b. In the code above, we are using the class's a and b variables by using self.a and self.b.
Once those variables are populated, then we call obj.add(), which adds those 2 variables.
Check out some interesting literature about classes here: https://www.digitalocean.com/community/tutorials/understanding-class-and-instance-variables-in-python-3
I have downloaded a tensorflow GraphDef that implements a VGG16 ConvNet, which I use doing this :
Pl['images'] = tf.placeholder(tf.float32,
[None, 448, 448, 3],
name="images") #batch x width x height x channels
with open("tensorflow-vgg16/vgg16.tfmodel", mode='rb') as f:
fileContent = f.read()
graph_def = tf.GraphDef()
graph_def.ParseFromString(fileContent)
tf.import_graph_def(graph_def, input_map={"images": Pl['images']})
Besides, I have image features that are homogeneous to the output of the "import/pool5/".
How can I tell my graph that don't want to use his input "images", but the tensor "import/pool5/" as input ?
Thank's !
EDIT
OK I realize I haven't been very clear. Here is the situation:
I am trying to use this implementation of ROI pooling, using a pre-trained VGG16, which I have in the GraphDef format. So here is what I do:
First of all, I load the model:
tf.reset_default_graph()
with open("tensorflow-vgg16/vgg16.tfmodel",
mode='rb') as f:
fileContent = f.read()
graph_def = tf.GraphDef()
graph_def.ParseFromString(fileContent)
graph = tf.get_default_graph()
Then, I create my placeholders
images = tf.placeholder(tf.float32,
[None, 448, 448, 3],
name="images") #batch x width x height x channels
boxes = tf.placeholder(tf.float32,
[None,5], # 5 = [batch_id,x1,y1,x2,y2]
name = "boxes")
And I define the output of the first part of the graph to be conv5_3/Relu
tf.import_graph_def(graph_def,
input_map={'images':images})
out_tensor = graph.get_tensor_by_name("import/conv5_3/Relu:0")
So, out_tensor is of shape [None,14,14,512]
Then, I do the ROI pooling:
[out_pool,argmax] = module.roi_pool(out_tensor,
boxes,
7,7,1.0/1)
With out_pool.shape = N_Boxes_in_batch x 7 x 7 x 512, which is homogeneous to pool5. I would then like to feed out_pool as an input to the op that comes just after pool5, so it would look like
tf.import_graph_def(graph.as_graph_def(),
input_map={'import/pool5':out_pool})
But it doesn't work, I have this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-89-527398d7344b> in <module>()
5
6 tf.import_graph_def(graph.as_graph_def(),
----> 7 input_map={'import/pool5':out_pool})
8
9 final_out = graph.get_tensor_by_name("import/Relu_1:0")
/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/importer.py in import_graph_def(graph_def, input_map, return_elements, name, op_dict)
333 # NOTE(mrry): If the graph contains a cycle, the full shape information
334 # may not be available for this op's inputs.
--> 335 ops.set_shapes_for_outputs(op)
336
337 # Apply device functions for this op.
/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py in set_shapes_for_outputs(op)
1610 raise RuntimeError("No shape function registered for standard op: %s"
1611 % op.type)
-> 1612 shapes = shape_func(op)
1613 if len(op.outputs) != len(shapes):
1614 raise RuntimeError(
/home/hbenyounes/vqa/roi_pooling_op_grad.py in _roi_pool_shape(op)
13 channels = dims_data[3]
14 print(op.inputs[1].name, op.inputs[1].get_shape())
---> 15 dims_rois = op.inputs[1].get_shape().as_list()
16 num_rois = dims_rois[0]
17
/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/tensor_shape.py in as_list(self)
745 A list of integers or None for each dimension.
746 """
--> 747 return [dim.value for dim in self._dims]
748
749 def as_proto(self):
TypeError: 'NoneType' object is not iterable
Any clue ?
It is usually very convenient to use tf.train.export_meta_graph to store the whole MetaGraph. Then, upon restoring you can use tf.train.import_meta_graph, because it turns out that it passes all additional arguments to the underlying import_scoped_meta_graph which has the input_map argument and utilizes it when it gets to it's own invocation of import_graph_def.
It is not documented, and took me waaaay toooo much time to find it, but it works!
What I would do is something along those lines:
-First retrieve the names of the tensors representing the weights and biases of the 3 fully connected layers coming after pool5 in VGG16.
To do that I would inspect [n.name for n in graph.as_graph_def().node].
(They probably look something like import/locali/weight:0, import/locali/bias:0, etc.)
-Put them in a python list:
weights_names=["import/local1/weight:0" ,"import/local2/weight:0" ,"import/local3/weight:0"]
biases_names=["import/local1/bias:0" ,"import/local2/bias:0" ,"import/local3/bias:0"]
-Define a function that look something like:
def pool5_tofcX(input_tensor, layer_number=3):
flatten=tf.reshape(input_tensor,(-1,7*7*512))
tmp=flatten
for i in xrange(layer_number):
tmp=tf.matmul(tmp, graph.get_tensor_by_name(weights_name[i]))
tmp=tf.nn.bias_add(tmp, graph.get_tensor_by_name(biases_name[i]))
tmp=tf.nn.relu(tmp)
return tmp
Then define the tensor using the function:
wanted_output=pool5_tofcX(out_pool)
Then you are done !
Jonan Georgiev provided an excellent answer here. The same approach was also described with little fanfare at the end of this git issue: https://github.com/tensorflow/tensorflow/issues/3389
Below is a copy/paste runnable example of using this approach to switch out a placeholder for a tf.data.Dataset get_next tensor.
import tensorflow as tf
my_placeholder = tf.placeholder(dtype=tf.float32, shape=1, name='my_placeholder')
my_op = tf.square(my_placeholder, name='my_op')
# Save the graph to memory
graph_def = tf.get_default_graph().as_graph_def()
print('----- my_op before any remapping -----')
print([n for n in graph_def.node if n.name == 'my_op'])
tf.reset_default_graph()
ds = tf.data.Dataset.from_tensors(1.0)
next_tensor = tf.data.make_one_shot_iterator(ds).get_next(name='my_next_tensor')
# Restore the graph with a custom input mapping
tf.graph_util.import_graph_def(graph_def, input_map={'my_placeholder': next_tensor}, name='')
print('----- my_op after remapping -----')
print([n for n in tf.get_default_graph().as_graph_def().node if n.name == 'my_op'])
Output, where we can clearly see that the input to the square operation has changed.
----- my_op before any remapping -----
[name: "my_op"
op: "Square"
input: "my_placeholder"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
]
----- my_op after remapping -----
[name: "my_op"
op: "Square"
input: "my_next_tensor"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
]
I'm using Python 3.4.2 and PyPDF2 1.24 (also using reportlab 3.1.44 in case that helps) on windows 7.
I recently upgraded from Python 2.7 to 3.4, and am in the process of porting my code. This code is used to create a blank pdf page with links embedded in it (using reportlab) and merge it (using PyPDF2) with an existing pdf page. I had an issue with reportlab in that saving the canvas used StringIO which needed to be changed to BytesIO, but after doing that I ran into this error:
Traceback (most recent call last):
File "C:\cms_software\pdf_replica\builder.py", line 401, in merge_pdf_files
input_page.mergePage(link_page)
File "C:\Python34\lib\site-packages\PyPDF2\pdf.py", line 2013, in mergePage
self.mergePage(page2)
File "C:\Python34\lib\site-packages\PyPDF2\pdf.py", line 2059, in mergePage
page2Content = PageObject._pushPopGS(page2Content, self.pdf)
File "C:\Python34\lib\site-packages\PyPDF2\pdf.py", line 1973, in _pushPopGS
stream = ContentStream(contents, pdf)
File "C:\Python34\lib\site-packages\PyPDF2\pdf.py", line 2446, in __init
stream = BytesIO(b_(stream.getData()))
File "C:\Python34\lib\site-packages\PyPDF2\generic.py", line 826, in getData
decoded._data = filters.decodeStreamData(self)
File "C:\Python34\lib\site-packages\PyPDF2\filters.py", line 326, in decodeStreamData
data = ASCII85Decode.decode(data)
File "C:\Python34\lib\site-packages\PyPDF2\filters.py", line 264, in decode
data = [y for y in data if not (y in ' \n\r\t')]
File "C:\Python34\lib\site-packages\PyPDF2\filters.py", line 264, in
data = [y for y in data if not (y in ' \n\r\t')]
TypeError: 'in <string>' requires string as left operand, not int
Here is the line and the line above where the traceback mentions:
link_page = self.make_pdf_link_page(pdf, size, margin, scale_factor, debug_article_links)
if link_page != None:
input_page.mergePage(link_page)
Here are the relevant parts of that make_pdf_link_page function:
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(size['width'], size['height']))
....# left out code here is just reportlab specifics for size and url stuff
can.linkURL(url, r1, thickness=1, color=colors.green)
can.rect(x1, y1, width, height, stroke=1, fill=0)
# create a new PDF with Reportlab that has the url link embedded
can.save()
packet.seek(0)
try:
new_pdf = PdfFileReader(packet)
except Exception as e:
logger.exception('e')
return None
return new_pdf.getPage(0)
I'm assuming it's a problem with using BytesIO, but I can't create the page with reportlab with StringIO. This is a critical feature that used to work perfectly with Python 2.7, so I'd appreciate any kind of feedback on this. Thanks!
UPDATE:
I've also tried changing from using BytesIO to just writing to a temp file, then merging. Unfortunately I got the same error.
Here is tempfile version:
import tempfile
temp_dir = tempfile.gettempdir()
temp_path = os.path.join(temp_dir, "tmp.pdf")
can = canvas.Canvas(temp_path, pagesize=(size['width'], size['height']))
....
can.showPage()
can.save()
try:
new_pdf = PdfFileReader(temp_path)
except Exception as e:
logger.exception('e')
return None
return new_pdf.getPage(0)
UPDATE:
I found an interesting bit of information on this. It seems if I comment out the can.rect and can.linkURL calls it will merge. So drawing anything on a page, then trying to merge it with my existing pdf is causing the error.
After digging in to PyPDF2 library code, I was able to find my own answer. For python 3 users, old libraries can be tricky. Even if they say they support python 3, they don't necessarily test everything. In this case, the problem was with the class ASCII85Decode in filters.py in PyPDF2. For python 3, this class needs to return bytes. I borrowed the code for this same type of function from pdfminer3k, which is a port for python 3 of pdfminer. If you exchange the ASCII85Decode() class for this code, it will work:
import struct
class ASCII85Decode(object):
def decode(data, decodeParms=None):
if isinstance(data, str):
data = data.encode('ascii')
n = b = 0
out = bytearray()
for c in data:
if ord('!') <= c and c <= ord('u'):
n += 1
b = b*85+(c-33)
if n == 5:
out += struct.pack(b'>L',b)
n = b = 0
elif c == ord('z'):
assert n == 0
out += b'\0\0\0\0'
elif c == ord('~'):
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack(b'>L',b)[:n-1]
break
return bytes(out)