Metpy Static Stability - MemoryError: Unable to allocate 13.4 GiB for an array with shape (19, 1825, 180, 288) and data type float64 - out-of-memory

I have downloaded GFDL-ESM model global data for temperature for 2010-2014 period of around 3.5 GB. I want to calcuate static stability using the METPY library.
My dataset looks like this Temperature Data
The code I am using is as follows:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gp
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
from cartopy import feature as cf
import netCDF4 as nc
import seaborn as sns
import glob
from datetime import datetime, timedelta
import cartopy.feature as cfeature
import matplotlib.gridspec as gridspec
import metpy.calc as mpcalc
from metpy.units import units
from metpy.plots.declarative import *
from netCDF4 import num2date
import scipy.ndimage as ndimage
from scipy.ndimage import gaussian_filter
from siphon.ncss import NCSS
from metpy.cbook import get_test_data
from metpy.interpolate import cross_section
import math
all=glob.glob(" {path} /ta_Eday_GFDL-ESM4_historical_r1i1p1f1_gr1_20100101-20141231.nc")
all
ds= xr.open_mfdataset(all).metpy.parse_cf()
ds
t=ds['ta']
pt = mpcalc.static_stability( t.plev*units.millibar , t*units.K )
After running this program the error I am getting is as follows:
--------
MemoryError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_14188\3773388047.py in <cell line: 1>()
----> 1 pt = mpcalc.static_stability( t.plev*units.millibar , t*units.K )
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1542 )
1543
-> 1544 return func(*bound_args.args, **bound_args.kwargs)
1545 return wrapper
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1233
1234 # Evaluate inner calculation
-> 1235 result = func(*bound_args.args, **bound_args.kwargs)
1236
1237 # Wrap output based on match and match_unit
~\miniconda3\lib\site-packages\metpy\units.py in wrapper(*args, **kwargs)
294 def wrapper(*args, **kwargs):
295 _check_units_inner_helper(func, sig, defaults, dims, *args, **kwargs)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
~\miniconda3\lib\site-packages\metpy\calc\thermo.py in static_stability(pressure, temperature, vertical_dim)
3125
3126 """
-> 3127 theta = potential_temperature(pressure, temperature)
3128
3129 return - mpconsts.Rd * temperature / pressure * first_derivative(
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1233
1234 # Evaluate inner calculation
-> 1235 result = func(*bound_args.args, **bound_args.kwargs)
1236
1237 # Wrap output based on match and match_unit
~\miniconda3\lib\site-packages\metpy\units.py in wrapper(*args, **kwargs)
294 def wrapper(*args, **kwargs):
295 _check_units_inner_helper(func, sig, defaults, dims, *args, **kwargs)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
~\miniconda3\lib\site-packages\metpy\calc\thermo.py in potential_temperature(pressure, temperature)
137
138 """
--> 139 return temperature / exner_function(pressure)
140
141
~\miniconda3\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1233
1234 # Evaluate inner calculation
-> 1235 result = func(*bound_args.args, **bound_args.kwargs)
1236
1237 # Wrap output based on match and match_unit
~\miniconda3\lib\site-packages\metpy\units.py in wrapper(*args, **kwargs)
294 def wrapper(*args, **kwargs):
295 _check_units_inner_helper(func, sig, defaults, dims, *args, **kwargs)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
~\miniconda3\lib\site-packages\metpy\calc\thermo.py in exner_function(pressure, reference_pressure)
95
96 """
---> 97 return (pressure / reference_pressure).to('dimensionless')**mpconsts.kappa
98
99
~\miniconda3\lib\site-packages\pint\quantity.py in __truediv__(self, other)
1339
1340 def __truediv__(self, other):
-> 1341 return self._mul_div(other, operator.truediv)
1342
1343 def __rtruediv__(self, other):
~\miniconda3\lib\site-packages\pint\quantity.py in wrapped(self, *args, **kwargs)
137 elif isinstance(other, list) and other and isinstance(other[0], type(self)):
138 return NotImplemented
--> 139 return f(self, *args, **kwargs)
140
141 return wrapped
~\miniconda3\lib\site-packages\pint\quantity.py in wrapped(self, *args, **kwargs)
117 def ireduce_dimensions(f):
118 def wrapped(self, *args, **kwargs):
--> 119 result = f(self, *args, **kwargs)
120 try:
121 if result._REGISTRY.auto_reduce_dimensions:
~\miniconda3\lib\site-packages\pint\quantity.py in _mul_div(self, other, magnitude_op, units_op)
1311 other = other.to_root_units()
1312
-> 1313 magnitude = magnitude_op(new_self._magnitude, other._magnitude)
1314 units = units_op(new_self._units, other._units)
1315
MemoryError: Unable to allocate 13.4 GiB for an array with shape (19, 1825, 180, 288) and data type float64
Please help.
​

As indicated by the MemoryError, the problem is that the calculation you're requesting needs to allocate a large 13GB array, which isn't fitting in the memory on your system.
When you use open_mfdataset, xarray lazily loads the data, meaning it doesn't actually read all of the values into memory until they are requested.
Unfortunately, MetPy's calculations operate eagerly, meaning when you call static_stability on this full dataset, it's going to iterate over all of these values and needs to store the full final result into memory.
The Dask library tries to address this by allowing for "chunking" arrays that are bigger than can fit in memory and smartly handling computations. Unfortunately, while xarray can use Dask internally, MetPy's support for Dask right now is not particularly robust; I'm not sure how well 1static_stability` would work when given a large Dask array.
If Dask doesn't work here with MetPy, one work-around is to only calculate on a subset of the data (e.g. whatever size you'd be doing analysis on, like a single time or a single vertical level) and iterate over these subsets.

Related

Why do I get No module named 'sympy.series.approximants' error when importing Sympy with anaconda?

Sympy is downloaded with anaconda. In a Jupyter notebook, I have 'from sympy import divisors' and I get this:
ModuleNotFoundError Traceback (most recent call last)
Cell In[12], line 1
----> 1 from sympy import divisors
File ~/Desktop/anaconda3/lib/python3.9/site-packages/sympy/__init__.py:107
70 from .assumptions import (AppliedPredicate, Predicate, AssumptionsContext,
71 assuming, Q, ask, register_handler, remove_handler, refine)
73 from .polys import (Poly, PurePoly, poly_from_expr, parallel_poly_from_expr,
74 degree, total_degree, degree_list, LC, LM, LT, pdiv, prem, pquo,
75 pexquo, div, rem, quo, exquo, half_gcdex, gcdex, invert,
(...)
104 laguerre_poly, apart, apart_list, assemble_partfrac_list, Options,
105 ring, xring, vring, sring, field, xfield, vfield, sfield)
--> 107 from .series import (Order, O, limit, Limit, gruntz, series, approximants,
108 residue, EmptySequence, SeqPer, SeqFormula, sequence, SeqAdd, SeqMul,
109 fourier_series, fps, difference_delta, limit_seq)
111 from .functions import (factorial, factorial2, rf, ff, binomial,
112 RisingFactorial, FallingFactorial, subfactorial, carmichael,
113 fibonacci, lucas, motzkin, tribonacci, harmonic, bernoulli, bell, euler,
(...)
132 Znm, elliptic_k, elliptic_f, elliptic_e, elliptic_pi, beta, mathieus,
133 mathieuc, mathieusprime, mathieucprime, riemann_xi, betainc, betainc_regularized)
135 from .ntheory import (nextprime, prevprime, prime, primepi, primerange,
136 randprime, Sieve, sieve, primorial, cycle_length, composite,
137 compositepi, isprime, divisors, proper_divisors, factorint,
(...)
148 continued_fraction_iterator, continued_fraction_reduce,
149 continued_fraction_convergents, continued_fraction, egyptian_fraction)
File ~/Desktop/anaconda3/lib/python3.9/site-packages/sympy/series/__init__.py:7
5 from .gruntz import gruntz
6 from .series import series
----> 7 from .approximants import approximants
8 from .residues import residue
9 from .sequences import SeqPer, SeqFormula, sequence, SeqAdd, SeqMul
ModuleNotFoundError: No module named 'sympy.series.approximants'
Any ideas on what I am doing wrong? Thank you.
Tried using the terminal to update, but I am garbage with anything beyond the basics.

I am trying to run a linear regression by plotting exchange rates and years however results to InvalidIndexError: (slice(None, None, None), None)

TypeError Traceback (most recent call last)
File D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File D:\Anaconda3\lib\site-packages\pandas\_libs\index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File D:\Anaconda3\lib\site-packages\pandas\_libs\index.pyx:142, in pandas._libs.index.IndexEngine.get_loc()
TypeError: '(slice(None, None, None), None)' is an invalid key
During handling of the above exception, another exception occurred:
InvalidIndexError Traceback (most recent call last)
Input In [18], in <cell line: 4>()
1 plt.figure(figsize=(20,10))
2 ax = plt.subplot()
----> 4 plt.plot(x,y_all)
5 ax.set_xticks(x_ticks)
6 ax.set_xticklabels(x_ticklabels)
File D:\Anaconda3\lib\site-packages\matplotlib\pyplot.py:2757, in plot(scalex, scaley, data, *args, **kwargs)
2755 #_copy_docstring_and_deprecators(Axes.plot)
2756 def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
-> 2757 return gca().plot(
2758 *args, scalex=scalex, scaley=scaley,
2759 **({"data": data} if data is not None else {}), **kwargs)
File D:\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py:1632, in Axes.plot(self, scalex, scaley, data, *args, **kwargs)
1390 """
1391 Plot y versus x as lines and/or markers.
1392
(...)
1629 (``'green'``) or hex strings (``'#008000'``).
1630 """
1631 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-> 1632 lines = [*self._get_lines(*args, data=data, **kwargs)]
1633 for line in lines:
1634 self.add_line(line)
File D:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py:312, in _process_plot_var_args.__call__(self, data, *args, **kwargs)
310 this += args[0],
311 args = args[1:]
--> 312 yield from self._plot_args(this, kwargs)
File D:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py:488, in _process_plot_var_args._plot_args(self, tup, kwargs, return_kwargs)
486 if len(xy) == 2:
487 x = _check_1d(xy[0])
--> 488 y = _check_1d(xy[1])
489 else:
490 x, y = index_of(xy[-1])
File D:\Anaconda3\lib\site-packages\matplotlib\cbook\__init__.py:1327, in _check_1d(x)
1321 with warnings.catch_warnings(record=True) as w:
1322 warnings.filterwarnings(
1323 "always",
1324 category=Warning,
1325 message='Support for multi-dimensional indexing')
-> 1327 ndim = x[:, None].ndim
1328 # we have definitely hit a pandas index or series object
1329 # cast to a numpy array.
1330 if len(w) > 0:
File D:\Anaconda3\lib\site-packages\pandas\core\frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:3628, in Index.get_loc(self, key, method, tolerance)
3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
-> 3628 self._check_indexing_error(key)
3629 raise
3631 # GH#42269
File D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:5637, in Index._check_indexing_error(self, key)
5633 def _check_indexing_error(self, key):
5634 if not is_scalar(key):
5635 # if key is not a scalar, directly raise an error (the code below
5636 # would convert to numpy arrays and raise later any way) - GH29926
-> 5637 raise InvalidIndexError(key)
InvalidIndexError: (slice(None, None, None), None)
from bokeh.m
y_all = groupby_all[['AUD_mean', 'EUR_mean', 'NZD_mean', 'SGD_mean', 'GBP_mean', 'CHF_mean','USD_mean']]
labels = ["AUD_mean", "EUR_mean", "NZD_mean", "SGD_mean", "GBP_mean", "CHF_mean", "USD_mean"]
x_ticks = list(range(1, 240, 12))
x_ticklabels = [x for x in range(2000, 2021)]
plt.figure(figsize=(20,10))
ax = plt.subplot()
plt.plot(x, y_all)
ax.set_xticks(x_ticks)
ax.set_xticklabels(x_ticklabels)
plt.legend(labels)
plt.title("Exchange Rate: Top Countries/USD")
plt.xlabel("Year")
plt.ylabel("Exchange Rate")
plt.show()

RuntimeError: Error loading state dict for SrlBert Missing keys: ['bert_model.embeddings.position_ids'] Unexpected keys: []

I am just a beginner in NLP and was trying to learn the Semantic role labeling concept through implementation.
I was trying to load the bert-base-srl model from the public storage of allennlp.
But was facing the following error:
from allennlp.predictors.predictor import Predictor
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bert-base-srl-2020.03.24.tar.gz")
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_11672/96061884.py in <module>
1 from allennlp.predictors.predictor import Predictor
----> 2 predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bert-base-srl-2020.03.24.tar.gz")
~\anaconda3\lib\site-packages\allennlp\predictors\predictor.py in from_path(cls, archive_path, predictor_name, cuda_device, dataset_reader_to_load, frozen, import_plugins, overrides, **kwargs)
364 plugins.import_plugins()
365 return Predictor.from_archive(
--> 366 load_archive(archive_path, cuda_device=cuda_device, overrides=overrides),
367 predictor_name,
368 dataset_reader_to_load=dataset_reader_to_load,
~\anaconda3\lib\site-packages\allennlp\models\archival.py in load_archive(archive_file, cuda_device, overrides, weights_file)
233 config.duplicate(), serialization_dir
234 )
--> 235 model = _load_model(config.duplicate(), weights_path, serialization_dir, cuda_device)
236
237 # Load meta.
~\anaconda3\lib\site-packages\allennlp\models\archival.py in _load_model(config, weights_path, serialization_dir, cuda_device)
277
278 def _load_model(config, weights_path, serialization_dir, cuda_device):
--> 279 return Model.load(
280 config,
281 weights_file=weights_path,
~\anaconda3\lib\site-packages\allennlp\models\model.py in load(cls, config, serialization_dir, weights_file, cuda_device)
436 # get_model_class method, that recurses whenever it finds a from_archive model type.
437 model_class = Model
--> 438 return model_class._load(config, serialization_dir, weights_file, cuda_device)
439
440 def extend_embedder_vocab(self, embedding_sources_mapping: Dict[str, str] = None) -> None:
~\anaconda3\lib\site-packages\allennlp\models\model.py in _load(cls, config, serialization_dir, weights_file, cuda_device)
378
379 if unexpected_keys or missing_keys:
--> 380 raise RuntimeError(
381 f"Error loading state dict for {model.__class__.__name__}\n\t"
382 f"Missing keys: {missing_keys}\n\t"
RuntimeError: Error loading state dict for SrlBert
Missing keys: ['bert_model.embeddings.position_ids']
Unexpected keys: []
Does someone know a fix for this?
If you are on the later versions of allennlp-models, you can use this archive_file instead: https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz.
The latest versions of the model archive files can be found on the demo page in the Model Card tab: https://demo.allennlp.org/semantic-role-labeling

Why is the file not found in jupyter notebook?

I am not sure why this is not working
import pandas as pd
df = pd.read_csv('pokedata.csv')
FileNotFoundError Traceback (most recent call last)
<ipython-input-1-a1266fea4180> in <module>
1 import pandas as pd
2
----> 3 df = pd.read_csv('pokedata.csv')
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
608 kwds.update(kwds_defaults)
609
--> 610 return _read(filepath_or_buffer, kwds)
611
612
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
460
461 # Create the parser.
--> 462 parser = TextFileReader(filepath_or_buffer, **kwds)
463
464 if chunksize or iterator:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
817 self.options["has_index_names"] = kwds["has_index_names"]
818
--> 819 self._engine = self._make_engine(self.engine)
820
821 def close(self):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
1048 )
1049 # error: Too many arguments for "ParserBase"
-> 1050 return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
1051
1052 def _failover_to_python(self):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1865
1866 # open handles
-> 1867 self._open_handles(src, kwds)
1868 assert self.handles is not None
1869 for key in ("storage_options", "encoding", "memory_map", "compression"):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _open_handles(self, src, kwds)
1360 Let the readers open IOHanldes after they are done with their potential raises.
1361 """
-> 1362 self.handles = get_handle(
1363 src,
1364 "r",
~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
640 errors = "replace"
641 # Encoding
--> 642 handle = open(
643 handle,
644 ioargs.mode,
FileNotFoundError: [Errno 2] No such file or directory: 'pokedata.csv'
Run
import os
cwd = os.getcwd()
print(cwd)
to find out if you are in the expected directory where 'pokedata.csv' resides.
Otherwise use the absolute path to the file, or change the directory with os.chdir().

XLDateAmbiguous error even when using dayfirst argument

I'm trying to import data into a pandas dataframe object from an excel spreadsheet parsing dates. I'm using dayfirst however I still get an error XLDateAmbiguous (docs)
The dates are in a single column in the format 25/09/1990
Could somebody explain to me why this happening and how I can fix it? Thanks in advance.
Edit: It seems as though the problem is caused by xlrd attempting to parse a non-date column as a date even thought I've specified which column the dates are in. Unfortunately I don't know how to explicitly indicate that a column should not be parsed as dates. Does anybody have any ideas?
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import dateutil
path6 = 'C:\\Users\\Site2_Homepage_2013-06-04.xlsx'
df8 = pd.io.excel.read_excel(path6, 'Site2_Homepage_2012_06_13', header=1, parse_dates=True, dayfirst=True)
XLDateAmbiguous Traceback (most recent call last)
<ipython-input-17-4a83d104ab72> in <module>()
4 path7 = 'C:\\Users\\Site4_Homepage_2013-06-04.xlsx'
5 path8 = 'C:\\Users\\Site7_Homepage_2013-06-04.xlsx'
----> 6 df8 = pd.io.excel.read_excel(path6, 'Site2_Homepage_2012_06_13', header=1, parse_dates=True, dayfirst=True)
7 df9 = pd.io.excel.read_excel(path7, 'Site4_Homepage_2012_06_13', header=1, parse_dates=[3], dayfirst=True)
8 df10 = pd.io.excel.read_excel(path8, 'Site7_Homepage_2012_06_13', header=1, parse_dates=[3], dayfirst=True)
C:\Users\AppData\Local\Enthought\Canopy32\User\lib\site-packages\pandas\io\excel.pyc in read_excel(io, sheetname, **kwds)
101 engine = kwds.pop('engine', None)
102
--> 103 return ExcelFile(io, engine=engine).parse(sheetname=sheetname, **kwds)
104
105
C:\Users\AppData\Local\Enthought\Canopy32\User\lib\site-packages\pandas\io\excel.pyc in parse(self, sheetname, header, skiprows, skip_footer, index_col, parse_cols, parse_dates, date_parser, na_values, thousands, chunksize, convert_float, has_index_names, **kwds)
206 skip_footer=skip_footer,
207 convert_float=convert_float,
--> 208 **kwds)
209
210 def _should_parse(self, i, parse_cols):
C:\Users\AppData\Local\Enthought\Canopy32\User\lib\site-packages\pandas\io\excel.pyc in _parse_excel(self, sheetname, header, skiprows, skip_footer, index_col, has_index_names, parse_cols, parse_dates, date_parser, na_values, thousands, chunksize, convert_float, **kwds)
267 if parse_cols is None or should_parse[j]:
268 if typ == XL_CELL_DATE:
--> 269 dt = xldate_as_tuple(value, datemode)
270 # how to produce this first case?
271 if dt[0] < datetime.MINYEAR: # pragma: no cover
C:\Users\AppData\Local\Enthought\Canopy32\User\lib\site-packages\xlrd\xldate.pyc in xldate_as_tuple(xldate, datemode)
78
79 if xldays < 61 and datemode == 0:
---> 80 raise XLDateAmbiguous(xldate)
81
82 jdn = xldays + _JDN_delta[datemode]
XLDateAmbiguous: 15.3
I didn't manage to find a solution of this. In the end I had to use .csv versions of the files for the dates to parse correctly.

Resources