I am not sure why this is not working
import pandas as pd
df = pd.read_csv('pokedata.csv')
FileNotFoundError Traceback (most recent call last)
<ipython-input-1-a1266fea4180> in <module>
1 import pandas as pd
2
----> 3 df = pd.read_csv('pokedata.csv')
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
608 kwds.update(kwds_defaults)
609
--> 610 return _read(filepath_or_buffer, kwds)
611
612
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
460
461 # Create the parser.
--> 462 parser = TextFileReader(filepath_or_buffer, **kwds)
463
464 if chunksize or iterator:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
817 self.options["has_index_names"] = kwds["has_index_names"]
818
--> 819 self._engine = self._make_engine(self.engine)
820
821 def close(self):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
1048 )
1049 # error: Too many arguments for "ParserBase"
-> 1050 return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
1051
1052 def _failover_to_python(self):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1865
1866 # open handles
-> 1867 self._open_handles(src, kwds)
1868 assert self.handles is not None
1869 for key in ("storage_options", "encoding", "memory_map", "compression"):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _open_handles(self, src, kwds)
1360 Let the readers open IOHanldes after they are done with their potential raises.
1361 """
-> 1362 self.handles = get_handle(
1363 src,
1364 "r",
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
640 errors = "replace"
641 # Encoding
--> 642 handle = open(
643 handle,
644 ioargs.mode,
FileNotFoundError: [Errno 2] No such file or directory: 'pokedata.csv'
Run
import os
cwd = os.getcwd()
print(cwd)
to find out if you are in the expected directory where 'pokedata.csv' resides.
Otherwise use the absolute path to the file, or change the directory with os.chdir().
Related
TypeError Traceback (most recent call last)
File D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File D:\Anaconda3\lib\site-packages\pandas\_libs\index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File D:\Anaconda3\lib\site-packages\pandas\_libs\index.pyx:142, in pandas._libs.index.IndexEngine.get_loc()
TypeError: '(slice(None, None, None), None)' is an invalid key
During handling of the above exception, another exception occurred:
InvalidIndexError Traceback (most recent call last)
Input In [18], in <cell line: 4>()
1 plt.figure(figsize=(20,10))
2 ax = plt.subplot()
----> 4 plt.plot(x,y_all)
5 ax.set_xticks(x_ticks)
6 ax.set_xticklabels(x_ticklabels)
File D:\Anaconda3\lib\site-packages\matplotlib\pyplot.py:2757, in plot(scalex, scaley, data, *args, **kwargs)
2755 #_copy_docstring_and_deprecators(Axes.plot)
2756 def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
-> 2757 return gca().plot(
2758 *args, scalex=scalex, scaley=scaley,
2759 **({"data": data} if data is not None else {}), **kwargs)
File D:\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py:1632, in Axes.plot(self, scalex, scaley, data, *args, **kwargs)
1390 """
1391 Plot y versus x as lines and/or markers.
1392
(...)
1629 (``'green'``) or hex strings (``'#008000'``).
1630 """
1631 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-> 1632 lines = [*self._get_lines(*args, data=data, **kwargs)]
1633 for line in lines:
1634 self.add_line(line)
File D:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py:312, in _process_plot_var_args.__call__(self, data, *args, **kwargs)
310 this += args[0],
311 args = args[1:]
--> 312 yield from self._plot_args(this, kwargs)
File D:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py:488, in _process_plot_var_args._plot_args(self, tup, kwargs, return_kwargs)
486 if len(xy) == 2:
487 x = _check_1d(xy[0])
--> 488 y = _check_1d(xy[1])
489 else:
490 x, y = index_of(xy[-1])
File D:\Anaconda3\lib\site-packages\matplotlib\cbook\__init__.py:1327, in _check_1d(x)
1321 with warnings.catch_warnings(record=True) as w:
1322 warnings.filterwarnings(
1323 "always",
1324 category=Warning,
1325 message='Support for multi-dimensional indexing')
-> 1327 ndim = x[:, None].ndim
1328 # we have definitely hit a pandas index or series object
1329 # cast to a numpy array.
1330 if len(w) > 0:
File D:\Anaconda3\lib\site-packages\pandas\core\frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:3628, in Index.get_loc(self, key, method, tolerance)
3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
-> 3628 self._check_indexing_error(key)
3629 raise
3631 # GH#42269
File D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:5637, in Index._check_indexing_error(self, key)
5633 def _check_indexing_error(self, key):
5634 if not is_scalar(key):
5635 # if key is not a scalar, directly raise an error (the code below
5636 # would convert to numpy arrays and raise later any way) - GH29926
-> 5637 raise InvalidIndexError(key)
InvalidIndexError: (slice(None, None, None), None)
from bokeh.m
y_all = groupby_all[['AUD_mean', 'EUR_mean', 'NZD_mean', 'SGD_mean', 'GBP_mean', 'CHF_mean','USD_mean']]
labels = ["AUD_mean", "EUR_mean", "NZD_mean", "SGD_mean", "GBP_mean", "CHF_mean", "USD_mean"]
x_ticks = list(range(1, 240, 12))
x_ticklabels = [x for x in range(2000, 2021)]
plt.figure(figsize=(20,10))
ax = plt.subplot()
plt.plot(x, y_all)
ax.set_xticks(x_ticks)
ax.set_xticklabels(x_ticklabels)
plt.legend(labels)
plt.title("Exchange Rate: Top Countries/USD")
plt.xlabel("Year")
plt.ylabel("Exchange Rate")
plt.show()
I am trying to run the below notebook through databricks but getting the below error. I have tried to update the notebook timeout and the retry mechanism but still no luck yet.
NotebookData("/Users/mynotebook",9900, retry=3)
]
res = parallelNotebooks(notebooks, 2)
result = [f.result(timeout=9900) for f in res] # This is a blocking call.
print(result)
Can someone please help me to sort out this issue? Thanks
%python
from concurrent.futures import ThreadPoolExecutor
class NotebookData:
def __init__(self, path, timeout, parameters=None, retry=0):
self.path = path
self.timeout = timeout
self.parameters = parameters
self.retry = retry
def submitNotebook(notebook):
print("Running notebook %s" % notebook.path)
try:
if (notebook.parameters):
return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
else:
return dbutils.notebook.run(notebook.path, notebook.timeout)
except Exception:
if notebook.retry < 1:
raise
print("Retrying notebook %s" % notebook.path)
notebook.retry = notebook.retry - 1
submitNotebook(notebook)
def parallelNotebooks(notebooks, numInParallel):
# This code limits the number of parallel notebooks.
with ThreadPoolExecutor(max_workers=numInParallel) as ec:
return [ec.submit(submitNotebook, notebook) for notebook in notebooks]
notebooks = [
NotebookData("/Users/mynotebook",1200000, retry=0)
]
res = parallelNotebooks(notebooks, 2)
result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
print(result)
Error:
Py4JJavaError Traceback (most recent call last)
<command-1143841910698378> in <module>
32 ]
33 res = parallelNotebooks(notebooks, 2)
---> 34 result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
35 print(result)
<command-1143841910698378> in <listcomp>(.0)
32 ]
33 res = parallelNotebooks(notebooks, 2)
---> 34 result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
35 print(result)
/usr/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
426 raise CancelledError()
427 elif self._state == FINISHED:
--> 428 return self.__get_result()
429
430 self._condition.wait(timeout)
/usr/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
/usr/lib/python3.7/concurrent/futures/thread.py in run(self)
55
56 try:
---> 57 result = self.fn(*self.args, **self.kwargs)
58 except BaseException as exc:
59 self.future.set_exception(exc)
<command-1143841910698378> in submitNotebook(notebook)
12 return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
13 else:
---> 14 return dbutils.notebook.run(notebook.path, notebook.timeout)
15 except Exception:
16 if notebook.retry < 1:
/local_disk0/tmp/1664351986642-0/dbutils.py in run(self, path, timeout_seconds, arguments, _NotebookHandler__databricks_internal_cluster_spec)
136 arguments,
137 __databricks_internal_cluster_spec,
--> 138 self.shell.currentJobGroup)
139
140 def __repr__(self):
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
125 def deco(*a, **kw):
126 try:
--> 127 return f(*a, **kw)
128 except py4j.protocol.Py4JJavaError as e:
129 converted = convert_exception(e.java_exception)
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling o1741._run.
: com.databricks.WorkflowException: com.databricks.NotebookExecutionException: FAILED
at com.databricks.workflow.WorkflowDriver.run(WorkflowDriver.scala:95)
at com.databricks.dbutils_v1.impl.NotebookUtilsImpl.run(NotebookUtilsImpl.scala:122)
at com.databricks.dbutils_v1.impl.NotebookUtilsImpl._run(NotebookUtilsImpl.scala:89)
at sun.reflect.GeneratedMethodAccessor820.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.NotebookExecutionException: FAILED
at com.databricks.workflow.WorkflowDriver.run0(WorkflowDriver.scala:141)
at com.databricks.workflow.WorkflowDriver.run(WorkflowDriver.scala:90)
... 12 more
I have downloaded GFDL-ESM model global data for temperature for 2010-2014 period of around 3.5 GB. I want to calcuate static stability using the METPY library.
My dataset looks like this Temperature Data
The code I am using is as follows:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gp
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
from cartopy import feature as cf
import netCDF4 as nc
import seaborn as sns
import glob
from datetime import datetime, timedelta
import cartopy.feature as cfeature
import matplotlib.gridspec as gridspec
import metpy.calc as mpcalc
from metpy.units import units
from metpy.plots.declarative import *
from netCDF4 import num2date
import scipy.ndimage as ndimage
from scipy.ndimage import gaussian_filter
from siphon.ncss import NCSS
from metpy.cbook import get_test_data
from metpy.interpolate import cross_section
import math
all=glob.glob(" {path} /ta_Eday_GFDL-ESM4_historical_r1i1p1f1_gr1_20100101-20141231.nc")
all
ds= xr.open_mfdataset(all).metpy.parse_cf()
ds
t=ds['ta']
pt = mpcalc.static_stability( t.plev*units.millibar , t*units.K )
After running this program the error I am getting is as follows:
--------
MemoryError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_14188\3773388047.py in <cell line: 1>()
----> 1 pt = mpcalc.static_stability( t.plev*units.millibar , t*units.K )
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1542 )
1543
-> 1544 return func(*bound_args.args, **bound_args.kwargs)
1545 return wrapper
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1233
1234 # Evaluate inner calculation
-> 1235 result = func(*bound_args.args, **bound_args.kwargs)
1236
1237 # Wrap output based on match and match_unit
~\miniconda3\lib\site-packages\metpy\units.py in wrapper(*args, **kwargs)
294 def wrapper(*args, **kwargs):
295 _check_units_inner_helper(func, sig, defaults, dims, *args, **kwargs)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
~\miniconda3\lib\site-packages\metpy\calc\thermo.py in static_stability(pressure, temperature, vertical_dim)
3125
3126 """
-> 3127 theta = potential_temperature(pressure, temperature)
3128
3129 return - mpconsts.Rd * temperature / pressure * first_derivative(
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1233
1234 # Evaluate inner calculation
-> 1235 result = func(*bound_args.args, **bound_args.kwargs)
1236
1237 # Wrap output based on match and match_unit
~\miniconda3\lib\site-packages\metpy\units.py in wrapper(*args, **kwargs)
294 def wrapper(*args, **kwargs):
295 _check_units_inner_helper(func, sig, defaults, dims, *args, **kwargs)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
~\miniconda3\lib\site-packages\metpy\calc\thermo.py in potential_temperature(pressure, temperature)
137
138 """
--> 139 return temperature / exner_function(pressure)
140
141
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1233
1234 # Evaluate inner calculation
-> 1235 result = func(*bound_args.args, **bound_args.kwargs)
1236
1237 # Wrap output based on match and match_unit
~\miniconda3\lib\site-packages\metpy\units.py in wrapper(*args, **kwargs)
294 def wrapper(*args, **kwargs):
295 _check_units_inner_helper(func, sig, defaults, dims, *args, **kwargs)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
~\miniconda3\lib\site-packages\metpy\calc\thermo.py in exner_function(pressure, reference_pressure)
95
96 """
---> 97 return (pressure / reference_pressure).to('dimensionless')**mpconsts.kappa
98
99
~\miniconda3\lib\site-packages\pint\quantity.py in __truediv__(self, other)
1339
1340 def __truediv__(self, other):
-> 1341 return self._mul_div(other, operator.truediv)
1342
1343 def __rtruediv__(self, other):
~\miniconda3\lib\site-packages\pint\quantity.py in wrapped(self, *args, **kwargs)
137 elif isinstance(other, list) and other and isinstance(other[0], type(self)):
138 return NotImplemented
--> 139 return f(self, *args, **kwargs)
140
141 return wrapped
~\miniconda3\lib\site-packages\pint\quantity.py in wrapped(self, *args, **kwargs)
117 def ireduce_dimensions(f):
118 def wrapped(self, *args, **kwargs):
--> 119 result = f(self, *args, **kwargs)
120 try:
121 if result._REGISTRY.auto_reduce_dimensions:
~\miniconda3\lib\site-packages\pint\quantity.py in _mul_div(self, other, magnitude_op, units_op)
1311 other = other.to_root_units()
1312
-> 1313 magnitude = magnitude_op(new_self._magnitude, other._magnitude)
1314 units = units_op(new_self._units, other._units)
1315
MemoryError: Unable to allocate 13.4 GiB for an array with shape (19, 1825, 180, 288) and data type float64
Please help.
As indicated by the MemoryError, the problem is that the calculation you're requesting needs to allocate a large 13GB array, which isn't fitting in the memory on your system.
When you use open_mfdataset, xarray lazily loads the data, meaning it doesn't actually read all of the values into memory until they are requested.
Unfortunately, MetPy's calculations operate eagerly, meaning when you call static_stability on this full dataset, it's going to iterate over all of these values and needs to store the full final result into memory.
The Dask library tries to address this by allowing for "chunking" arrays that are bigger than can fit in memory and smartly handling computations. Unfortunately, while xarray can use Dask internally, MetPy's support for Dask right now is not particularly robust; I'm not sure how well 1static_stability` would work when given a large Dask array.
If Dask doesn't work here with MetPy, one work-around is to only calculate on a subset of the data (e.g. whatever size you'd be doing analysis on, like a single time or a single vertical level) and iterate over these subsets.
I am just a beginner in NLP and was trying to learn the Semantic role labeling concept through implementation.
I was trying to load the bert-base-srl model from the public storage of allennlp.
But was facing the following error:
from allennlp.predictors.predictor import Predictor
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bert-base-srl-2020.03.24.tar.gz")
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_11672/96061884.py in <module>
1 from allennlp.predictors.predictor import Predictor
----> 2 predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bert-base-srl-2020.03.24.tar.gz")
~\anaconda3\lib\site-packages\allennlp\predictors\predictor.py in from_path(cls, archive_path, predictor_name, cuda_device, dataset_reader_to_load, frozen, import_plugins, overrides, **kwargs)
364 plugins.import_plugins()
365 return Predictor.from_archive(
--> 366 load_archive(archive_path, cuda_device=cuda_device, overrides=overrides),
367 predictor_name,
368 dataset_reader_to_load=dataset_reader_to_load,
~\anaconda3\lib\site-packages\allennlp\models\archival.py in load_archive(archive_file, cuda_device, overrides, weights_file)
233 config.duplicate(), serialization_dir
234 )
--> 235 model = _load_model(config.duplicate(), weights_path, serialization_dir, cuda_device)
236
237 # Load meta.
~\anaconda3\lib\site-packages\allennlp\models\archival.py in _load_model(config, weights_path, serialization_dir, cuda_device)
277
278 def _load_model(config, weights_path, serialization_dir, cuda_device):
--> 279 return Model.load(
280 config,
281 weights_file=weights_path,
~\anaconda3\lib\site-packages\allennlp\models\model.py in load(cls, config, serialization_dir, weights_file, cuda_device)
436 # get_model_class method, that recurses whenever it finds a from_archive model type.
437 model_class = Model
--> 438 return model_class._load(config, serialization_dir, weights_file, cuda_device)
439
440 def extend_embedder_vocab(self, embedding_sources_mapping: Dict[str, str] = None) -> None:
~\anaconda3\lib\site-packages\allennlp\models\model.py in _load(cls, config, serialization_dir, weights_file, cuda_device)
378
379 if unexpected_keys or missing_keys:
--> 380 raise RuntimeError(
381 f"Error loading state dict for {model.__class__.__name__}\n\t"
382 f"Missing keys: {missing_keys}\n\t"
RuntimeError: Error loading state dict for SrlBert
Missing keys: ['bert_model.embeddings.position_ids']
Unexpected keys: []
Does someone know a fix for this?
If you are on the later versions of allennlp-models, you can use this archive_file instead: https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz.
The latest versions of the model archive files can be found on the demo page in the Model Card tab: https://demo.allennlp.org/semantic-role-labeling
I am trying to index a datetime that is being formed from 3 columns representing (year, dayofyear, and 2400hr time).
2014,323,1203,47.77,320.9
2014,323,1204,48.46,402.6
2014,323,1205,49.2,422.7
2014,323,1206,49.82,432.4
2014,323,1207,50.03,438.6
2014,323,1208,50.15,445.4
2014,323,1209,50.85,449.7
2014,323,1210,50.85,454.4
2014,323,1211,50.85,458.1
2014,323,1212,50.91,460.2
I am using the following code:
In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
In [2]:
def parse(yr, yearday, hrmn):
date_string = ' '.join([yr, yearday, hrmn])
print(date_string)
return datetime.strptime(date_string,"%Y %j %H%M")
In [3]:
df = pd.read_csv('home_prepped.dat', parse_dates={'datetime':[0,1,2]},
date_parser=parse, index_col='datetime', header=None)
I have had success bringing it in when the data was flawed (had extra data over DST change), and now that it is fixed (removed and stitched back together) I am having this error (in its entirety):
2014 92 2355
2014 92 2356
2014 92 2357
2014 92 2358
2014 92 2359
2014 92 2400
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-9c710834ee23> in <module>()
1
----> 2 df = pd.read_csv('home_prepped.dat', parse_dates={'datetime':[0,1,2]}, date_parser=parse, index_col='datetime', header=None)
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
463 skip_blank_lines=skip_blank_lines)
464
--> 465 return _read(filepath_or_buffer, kwds)
466
467 parser_f.__name__ = name
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
249 return parser
250
--> 251 return parser.read()
252
253 _parser_defaults = {
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in read(self, nrows)
708 raise ValueError('skip_footer not supported for iteration')
709
--> 710 ret = self._engine.read(nrows)
711
712 if self.options.get('as_recarray'):
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in read(self, nrows)
1209 data = dict((k, v) for k, (i, v) in zip(names, data))
1210
-> 1211 names, data = self._do_date_conversions(names, data)
1212 index, names = self._make_index(data, alldata, names)
1213
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _do_date_conversions(self, names, data)
1033 data, names = _process_date_conversion(
1034 data, self._date_conv, self.parse_dates, self.index_col,
-> 1035 self.index_names, names, keep_date_col=self.keep_date_col)
1036
1037 return names, data
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _process_date_conversion(data_dict, converter, parse_spec, index_col, index_names, columns, keep_date_col)
2100
2101 _, col, old_names = _try_convert_dates(converter, colspec,
-> 2102 data_dict, orig_names)
2103
2104 new_data[new_name] = col
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _try_convert_dates(parser, colspec, data_dict, columns)
2132 to_parse = [data_dict[c] for c in colnames if c in data_dict]
2133
-> 2134 new_col = parser(*to_parse)
2135 return new_name, new_col, colnames
2136
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in converter(*date_cols)
2048 dayfirst=dayfirst)
2049 except Exception:
-> 2050 return generic_parser(date_parser, *date_cols)
2051
2052 return converter
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/date_converters.pyc in generic_parser(parse_func, *cols)
36 for i in range(N):
37 args = [c[i] for c in cols]
---> 38 results[i] = parse_func(*args)
39
40 return results
<ipython-input-2-57e18ddd7deb> in parse(yr, yearday, hrmn)
1 def parse(yr, yearday, hrmn):
2 date_string = ' '.join([yr, yearday, hrmn])
----> 3 return datetime.strptime(date_string,"%Y %j %H%M")
/Volumes/anaconda/anaconda/python.app/Contents/lib/python2.7/_strptime.pyc in _strptime(data_string, format)
326 if len(data_string) != found.end():
327 raise ValueError("unconverted data remains: %s" %
--> 328 data_string[found.end():])
329
330 year = None
ValueError: unconverted data remains: 0
I am looking for suggestions as to how to debug or work around this. I have gone through the data and according to what I have read in similar posts I should be looking for extraneous time data, which is not there.
Thanks.