Initial commit: Masina-Dock Vehicle Management System

This commit is contained in:
Iulian 2025-10-19 11:10:11 +01:00
commit ae923e2c41
4999 changed files with 1607266 additions and 0 deletions

View file

@ -0,0 +1,353 @@
from __future__ import annotations
__docformat__ = "restructuredtext"
# Let users know if they're missing any of our hard dependencies
_hard_dependencies = ("numpy", "pytz", "dateutil")
_missing_dependencies = []
for _dependency in _hard_dependencies:
try:
__import__(_dependency)
except ImportError as _e: # pragma: no cover
_missing_dependencies.append(f"{_dependency}: {_e}")
if _missing_dependencies: # pragma: no cover
raise ImportError(
"Unable to import required dependencies:\n" + "\n".join(_missing_dependencies)
)
del _hard_dependencies, _dependency, _missing_dependencies
try:
# numpy compat
from pandas.compat import (
is_numpy_dev as _is_numpy_dev, # pyright: ignore[reportUnusedImport] # noqa: F401,E501
)
except ImportError as _err: # pragma: no cover
_module = _err.name
raise ImportError(
f"C extension: {_module} not built. If you want to import "
"pandas from the source directory, you may need to run "
"'python setup.py build_ext' to build the C extensions first."
) from _err
from pandas._config import (
get_option,
set_option,
reset_option,
describe_option,
option_context,
options,
)
# let init-time option registration happen
import pandas.core.config_init # pyright: ignore[reportUnusedImport] # noqa: F401
from pandas.core.api import (
# dtype
ArrowDtype,
Int8Dtype,
Int16Dtype,
Int32Dtype,
Int64Dtype,
UInt8Dtype,
UInt16Dtype,
UInt32Dtype,
UInt64Dtype,
Float32Dtype,
Float64Dtype,
CategoricalDtype,
PeriodDtype,
IntervalDtype,
DatetimeTZDtype,
StringDtype,
BooleanDtype,
# missing
NA,
isna,
isnull,
notna,
notnull,
# indexes
Index,
CategoricalIndex,
RangeIndex,
MultiIndex,
IntervalIndex,
TimedeltaIndex,
DatetimeIndex,
PeriodIndex,
IndexSlice,
# tseries
NaT,
Period,
period_range,
Timedelta,
timedelta_range,
Timestamp,
date_range,
bdate_range,
Interval,
interval_range,
DateOffset,
# conversion
to_numeric,
to_datetime,
to_timedelta,
# misc
Flags,
Grouper,
factorize,
unique,
value_counts,
NamedAgg,
array,
Categorical,
set_eng_float_format,
Series,
DataFrame,
)
from pandas.core.dtypes.dtypes import SparseDtype
from pandas.tseries.api import infer_freq
from pandas.tseries import offsets
from pandas.core.computation.api import eval
from pandas.core.reshape.api import (
concat,
lreshape,
melt,
wide_to_long,
merge,
merge_asof,
merge_ordered,
crosstab,
pivot,
pivot_table,
get_dummies,
from_dummies,
cut,
qcut,
)
from pandas import api, arrays, errors, io, plotting, tseries
from pandas import testing
from pandas.util._print_versions import show_versions
from pandas.io.api import (
# excel
ExcelFile,
ExcelWriter,
read_excel,
# parsers
read_csv,
read_fwf,
read_table,
# pickle
read_pickle,
to_pickle,
# pytables
HDFStore,
read_hdf,
# sql
read_sql,
read_sql_query,
read_sql_table,
# misc
read_clipboard,
read_parquet,
read_orc,
read_feather,
read_gbq,
read_html,
read_xml,
read_json,
read_stata,
read_sas,
read_spss,
)
from pandas.io.json._normalize import json_normalize
from pandas.util._tester import test
# use the closest tagged version if possible
_built_with_meson = False
try:
from pandas._version_meson import ( # pyright: ignore [reportMissingImports]
__version__,
__git_version__,
)
_built_with_meson = True
except ImportError:
from pandas._version import get_versions
v = get_versions()
__version__ = v.get("closest-tag", v["version"])
__git_version__ = v.get("full-revisionid")
del get_versions, v
# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python
=====================================================================
**pandas** is a Python package providing fast, flexible, and expressive data
structures designed to make working with "relational" or "labeled" data both
easy and intuitive. It aims to be the fundamental high-level building block for
doing practical, **real world** data analysis in Python. Additionally, it has
the broader goal of becoming **the most powerful and flexible open source data
analysis / manipulation tool available in any language**. It is already well on
its way toward this goal.
Main Features
-------------
Here are just a few of the things that pandas does well:
- Easy handling of missing data in floating point as well as non-floating
point data.
- Size mutability: columns can be inserted and deleted from DataFrame and
higher dimensional objects
- Automatic and explicit data alignment: objects can be explicitly aligned
to a set of labels, or the user can simply ignore the labels and let
`Series`, `DataFrame`, etc. automatically align the data for you in
computations.
- Powerful, flexible group by functionality to perform split-apply-combine
operations on data sets, for both aggregating and transforming data.
- Make it easy to convert ragged, differently-indexed data in other Python
and NumPy data structures into DataFrame objects.
- Intelligent label-based slicing, fancy indexing, and subsetting of large
data sets.
- Intuitive merging and joining data sets.
- Flexible reshaping and pivoting of data sets.
- Hierarchical labeling of axes (possible to have multiple labels per tick).
- Robust IO tools for loading data from flat files (CSV and delimited),
Excel files, databases, and saving/loading data from the ultrafast HDF5
format.
- Time series-specific functionality: date range generation and frequency
conversion, moving window statistics, date shifting and lagging.
"""
# Use __all__ to let type checkers know what is part of the public API.
# Pandas is not (yet) a py.typed library: the public API is determined
# based on the documentation.
__all__ = [
"ArrowDtype",
"BooleanDtype",
"Categorical",
"CategoricalDtype",
"CategoricalIndex",
"DataFrame",
"DateOffset",
"DatetimeIndex",
"DatetimeTZDtype",
"ExcelFile",
"ExcelWriter",
"Flags",
"Float32Dtype",
"Float64Dtype",
"Grouper",
"HDFStore",
"Index",
"IndexSlice",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"Int8Dtype",
"Interval",
"IntervalDtype",
"IntervalIndex",
"MultiIndex",
"NA",
"NaT",
"NamedAgg",
"Period",
"PeriodDtype",
"PeriodIndex",
"RangeIndex",
"Series",
"SparseDtype",
"StringDtype",
"Timedelta",
"TimedeltaIndex",
"Timestamp",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"UInt8Dtype",
"api",
"array",
"arrays",
"bdate_range",
"concat",
"crosstab",
"cut",
"date_range",
"describe_option",
"errors",
"eval",
"factorize",
"get_dummies",
"from_dummies",
"get_option",
"infer_freq",
"interval_range",
"io",
"isna",
"isnull",
"json_normalize",
"lreshape",
"melt",
"merge",
"merge_asof",
"merge_ordered",
"notna",
"notnull",
"offsets",
"option_context",
"options",
"period_range",
"pivot",
"pivot_table",
"plotting",
"qcut",
"read_clipboard",
"read_csv",
"read_excel",
"read_feather",
"read_fwf",
"read_gbq",
"read_hdf",
"read_html",
"read_json",
"read_orc",
"read_parquet",
"read_pickle",
"read_sas",
"read_spss",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_stata",
"read_table",
"read_xml",
"reset_option",
"set_eng_float_format",
"set_option",
"show_versions",
"test",
"testing",
"timedelta_range",
"to_datetime",
"to_numeric",
"to_pickle",
"to_timedelta",
"tseries",
"unique",
"value_counts",
"wide_to_long",
]

View file

@ -0,0 +1,45 @@
"""
pandas._config is considered explicitly upstream of everything else in pandas,
should have no intra-pandas dependencies.
importing `dates` and `display` ensures that keys needed by _libs
are initialized.
"""
__all__ = [
"config",
"detect_console_encoding",
"get_option",
"set_option",
"reset_option",
"describe_option",
"option_context",
"options",
"using_copy_on_write",
]
from pandas._config import config
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
from pandas._config.config import (
_global_config,
describe_option,
get_option,
option_context,
options,
reset_option,
set_option,
)
from pandas._config.display import detect_console_encoding
def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
def using_nullable_dtypes() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["nullable_dtypes"]
def using_pyarrow_string_dtype() -> bool:
_mode_options = _global_config["future"]
return _mode_options["infer_string"]

View file

@ -0,0 +1,946 @@
"""
The config module holds package-wide configurables and provides
a uniform API for working with them.
Overview
========
This module supports the following requirements:
- options are referenced using keys in dot.notation, e.g. "x.y.option - z".
- keys are case-insensitive.
- functions should accept partial/regex keys, when unambiguous.
- options can be registered by modules at import time.
- options can be registered at init-time (via core.config_init)
- options have a default value, and (optionally) a description and
validation function associated with them.
- options can be deprecated, in which case referencing them
should produce a warning.
- deprecated options can optionally be rerouted to a replacement
so that accessing a deprecated option reroutes to a differently
named option.
- options can be reset to their default value.
- all option can be reset to their default value at once.
- all options in a certain sub - namespace can be reset at once.
- the user can set / get / reset or ask for the description of an option.
- a developer can register and mark an option as deprecated.
- you can register a callback to be invoked when the option value
is set or reset. Changing the stored value is considered misuse, but
is not verboten.
Implementation
==============
- Data is stored using nested dictionaries, and should be accessed
through the provided API.
- "Registered options" and "Deprecated options" have metadata associated
with them, which are stored in auxiliary dictionaries keyed on the
fully-qualified key, e.g. "x.y.z.option".
- the config_init module is imported by the package's __init__.py file.
placing any register_option() calls there will ensure those options
are available as soon as pandas is loaded. If you use register_option
in a module, it will only be available after that module is imported,
which you should be aware of.
- `config_prefix` is a context_manager (for use with the `with` keyword)
which can save developers some typing, see the docstring.
"""
from __future__ import annotations
from contextlib import (
ContextDecorator,
contextmanager,
)
import re
from typing import (
TYPE_CHECKING,
Any,
Callable,
Generic,
NamedTuple,
cast,
)
import warnings
from pandas._typing import (
F,
T,
)
from pandas.util._exceptions import find_stack_level
if TYPE_CHECKING:
from collections.abc import (
Generator,
Iterable,
)
class DeprecatedOption(NamedTuple):
key: str
msg: str | None
rkey: str | None
removal_ver: str | None
class RegisteredOption(NamedTuple):
key: str
defval: object
doc: str
validator: Callable[[object], Any] | None
cb: Callable[[str], Any] | None
# holds deprecated option metadata
_deprecated_options: dict[str, DeprecatedOption] = {}
# holds registered option metadata
_registered_options: dict[str, RegisteredOption] = {}
# holds the current values for registered options
_global_config: dict[str, Any] = {}
# keys which have a special meaning
_reserved_keys: list[str] = ["all"]
class OptionError(AttributeError, KeyError):
"""
Exception raised for pandas.options.
Backwards compatible with KeyError checks.
Examples
--------
>>> pd.options.context
Traceback (most recent call last):
OptionError: No such option
"""
#
# User API
def _get_single_key(pat: str, silent: bool) -> str:
keys = _select_options(pat)
if len(keys) == 0:
if not silent:
_warn_if_deprecated(pat)
raise OptionError(f"No such keys(s): {repr(pat)}")
if len(keys) > 1:
raise OptionError("Pattern matched multiple keys")
key = keys[0]
if not silent:
_warn_if_deprecated(key)
key = _translate_key(key)
return key
def _get_option(pat: str, silent: bool = False) -> Any:
key = _get_single_key(pat, silent)
# walk the nested dict
root, k = _get_root(key)
return root[k]
def _set_option(*args, **kwargs) -> None:
# must at least 1 arg deal with constraints later
nargs = len(args)
if not nargs or nargs % 2 != 0:
raise ValueError("Must provide an even number of non-keyword arguments")
# default to false
silent = kwargs.pop("silent", False)
if kwargs:
kwarg = next(iter(kwargs.keys()))
raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"')
for k, v in zip(args[::2], args[1::2]):
key = _get_single_key(k, silent)
o = _get_registered_option(key)
if o and o.validator:
o.validator(v)
# walk the nested dict
root, k_root = _get_root(key)
root[k_root] = v
if o.cb:
if silent:
with warnings.catch_warnings(record=True):
o.cb(key)
else:
o.cb(key)
def _describe_option(pat: str = "", _print_desc: bool = True) -> str | None:
keys = _select_options(pat)
if len(keys) == 0:
raise OptionError("No such keys(s)")
s = "\n".join([_build_option_description(k) for k in keys])
if _print_desc:
print(s)
return None
return s
def _reset_option(pat: str, silent: bool = False) -> None:
keys = _select_options(pat)
if len(keys) == 0:
raise OptionError("No such keys(s)")
if len(keys) > 1 and len(pat) < 4 and pat != "all":
raise ValueError(
"You must specify at least 4 characters when "
"resetting multiple keys, use the special keyword "
'"all" to reset all the options to their default value'
)
for k in keys:
_set_option(k, _registered_options[k].defval, silent=silent)
def get_default_val(pat: str):
key = _get_single_key(pat, silent=True)
return _get_registered_option(key).defval
class DictWrapper:
"""provide attribute-style access to a nested dict"""
def __init__(self, d: dict[str, Any], prefix: str = "") -> None:
object.__setattr__(self, "d", d)
object.__setattr__(self, "prefix", prefix)
def __setattr__(self, key: str, val: Any) -> None:
prefix = object.__getattribute__(self, "prefix")
if prefix:
prefix += "."
prefix += key
# you can't set new keys
# can you can't overwrite subtrees
if key in self.d and not isinstance(self.d[key], dict):
_set_option(prefix, val)
else:
raise OptionError("You can only set the value of existing options")
def __getattr__(self, key: str):
prefix = object.__getattribute__(self, "prefix")
if prefix:
prefix += "."
prefix += key
try:
v = object.__getattribute__(self, "d")[key]
except KeyError as err:
raise OptionError("No such option") from err
if isinstance(v, dict):
return DictWrapper(v, prefix)
else:
return _get_option(prefix)
def __dir__(self) -> Iterable[str]:
return list(self.d.keys())
# For user convenience, we'd like to have the available options described
# in the docstring. For dev convenience we'd like to generate the docstrings
# dynamically instead of maintaining them by hand. To this, we use the
# class below which wraps functions inside a callable, and converts
# __doc__ into a property function. The doctsrings below are templates
# using the py2.6+ advanced formatting syntax to plug in a concise list
# of options, and option descriptions.
class CallableDynamicDoc(Generic[T]):
def __init__(self, func: Callable[..., T], doc_tmpl: str) -> None:
self.__doc_tmpl__ = doc_tmpl
self.__func__ = func
def __call__(self, *args, **kwds) -> T:
return self.__func__(*args, **kwds)
# error: Signature of "__doc__" incompatible with supertype "object"
@property
def __doc__(self) -> str: # type: ignore[override]
opts_desc = _describe_option("all", _print_desc=False)
opts_list = pp_options_list(list(_registered_options.keys()))
return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list)
_get_option_tmpl = """
get_option(pat)
Retrieves the value of the specified option.
Available options:
{opts_list}
Parameters
----------
pat : str
Regexp which should match a single option.
Note: partial matches are supported for convenience, but unless you use the
full option name (e.g. x.y.z.option_name), your code may break in future
versions if new options with similar names are introduced.
Returns
-------
result : the value of the option
Raises
------
OptionError : if no such option exists
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.get_option('display.max_columns') # doctest: +SKIP
4
"""
_set_option_tmpl = """
set_option(pat, value)
Sets the value of the specified option.
Available options:
{opts_list}
Parameters
----------
pat : str
Regexp which should match a single option.
Note: partial matches are supported for convenience, but unless you use the
full option name (e.g. x.y.z.option_name), your code may break in future
versions if new options with similar names are introduced.
value : object
New value of option.
Returns
-------
None
Raises
------
OptionError if no such option exists
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.set_option('display.max_columns', 4)
>>> df = pd.DataFrame([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
>>> df
0 1 ... 3 4
0 1 2 ... 4 5
1 6 7 ... 9 10
[2 rows x 5 columns]
>>> pd.reset_option('display.max_columns')
"""
_describe_option_tmpl = """
describe_option(pat, _print_desc=False)
Prints the description for one or more registered options.
Call with no arguments to get a listing for all registered options.
Available options:
{opts_list}
Parameters
----------
pat : str
Regexp pattern. All matching keys will have their description displayed.
_print_desc : bool, default True
If True (default) the description(s) will be printed to stdout.
Otherwise, the description(s) will be returned as a unicode string
(for testing).
Returns
-------
None by default, the description(s) as a unicode string if _print_desc
is False
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.describe_option('display.max_columns') # doctest: +SKIP
display.max_columns : int
If max_cols is exceeded, switch to truncate view...
"""
_reset_option_tmpl = """
reset_option(pat)
Reset one or more options to their default value.
Pass "all" as argument to reset all options.
Available options:
{opts_list}
Parameters
----------
pat : str/regex
If specified only options matching `prefix*` will be reset.
Note: partial matches are supported for convenience, but unless you
use the full option name (e.g. x.y.z.option_name), your code may break
in future versions if new options with similar names are introduced.
Returns
-------
None
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.reset_option('display.max_columns') # doctest: +SKIP
"""
# bind the functions with their docstrings into a Callable
# and use that as the functions exposed in pd.api
get_option = CallableDynamicDoc(_get_option, _get_option_tmpl)
set_option = CallableDynamicDoc(_set_option, _set_option_tmpl)
reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl)
describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl)
options = DictWrapper(_global_config)
#
# Functions for use by pandas developers, in addition to User - api
class option_context(ContextDecorator):
"""
Context manager to temporarily set options in the `with` statement context.
You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
Examples
--------
>>> from pandas import option_context
>>> with option_context('display.max_rows', 10, 'display.max_columns', 5):
... pass
"""
def __init__(self, *args) -> None:
if len(args) % 2 != 0 or len(args) < 2:
raise ValueError(
"Need to invoke as option_context(pat, val, [(pat, val), ...])."
)
self.ops = list(zip(args[::2], args[1::2]))
def __enter__(self) -> None:
self.undo = [(pat, _get_option(pat)) for pat, val in self.ops]
for pat, val in self.ops:
_set_option(pat, val, silent=True)
def __exit__(self, *args) -> None:
if self.undo:
for pat, val in self.undo:
_set_option(pat, val, silent=True)
def register_option(
key: str,
defval: object,
doc: str = "",
validator: Callable[[object], Any] | None = None,
cb: Callable[[str], Any] | None = None,
) -> None:
"""
Register an option in the package-wide pandas config object
Parameters
----------
key : str
Fully-qualified key, e.g. "x.y.option - z".
defval : object
Default value of the option.
doc : str
Description of the option.
validator : Callable, optional
Function of a single argument, should raise `ValueError` if
called with a value which is not a legal value for the option.
cb
a function of a single argument "key", which is called
immediately after an option value is set/reset. key is
the full name of the option.
Raises
------
ValueError if `validator` is specified and `defval` is not a valid value.
"""
import keyword
import tokenize
key = key.lower()
if key in _registered_options:
raise OptionError(f"Option '{key}' has already been registered")
if key in _reserved_keys:
raise OptionError(f"Option '{key}' is a reserved key")
# the default value should be legal
if validator:
validator(defval)
# walk the nested dict, creating dicts as needed along the path
path = key.split(".")
for k in path:
if not re.match("^" + tokenize.Name + "$", k):
raise ValueError(f"{k} is not a valid identifier")
if keyword.iskeyword(k):
raise ValueError(f"{k} is a python keyword")
cursor = _global_config
msg = "Path prefix to option '{option}' is already an option"
for i, p in enumerate(path[:-1]):
if not isinstance(cursor, dict):
raise OptionError(msg.format(option=".".join(path[:i])))
if p not in cursor:
cursor[p] = {}
cursor = cursor[p]
if not isinstance(cursor, dict):
raise OptionError(msg.format(option=".".join(path[:-1])))
cursor[path[-1]] = defval # initialize
# save the option metadata
_registered_options[key] = RegisteredOption(
key=key, defval=defval, doc=doc, validator=validator, cb=cb
)
def deprecate_option(
key: str,
msg: str | None = None,
rkey: str | None = None,
removal_ver: str | None = None,
) -> None:
"""
Mark option `key` as deprecated, if code attempts to access this option,
a warning will be produced, using `msg` if given, or a default message
if not.
if `rkey` is given, any access to the key will be re-routed to `rkey`.
Neither the existence of `key` nor that if `rkey` is checked. If they
do not exist, any subsequence access will fail as usual, after the
deprecation warning is given.
Parameters
----------
key : str
Name of the option to be deprecated.
must be a fully-qualified option name (e.g "x.y.z.rkey").
msg : str, optional
Warning message to output when the key is referenced.
if no message is given a default message will be emitted.
rkey : str, optional
Name of an option to reroute access to.
If specified, any referenced `key` will be
re-routed to `rkey` including set/get/reset.
rkey must be a fully-qualified option name (e.g "x.y.z.rkey").
used by the default message if no `msg` is specified.
removal_ver : str, optional
Specifies the version in which this option will
be removed. used by the default message if no `msg` is specified.
Raises
------
OptionError
If the specified key has already been deprecated.
"""
key = key.lower()
if key in _deprecated_options:
raise OptionError(f"Option '{key}' has already been defined as deprecated.")
_deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver)
#
# functions internal to the module
def _select_options(pat: str) -> list[str]:
"""
returns a list of keys matching `pat`
if pat=="all", returns all registered options
"""
# short-circuit for exact key
if pat in _registered_options:
return [pat]
# else look through all of them
keys = sorted(_registered_options.keys())
if pat == "all": # reserved key
return keys
return [k for k in keys if re.search(pat, k, re.I)]
def _get_root(key: str) -> tuple[dict[str, Any], str]:
path = key.split(".")
cursor = _global_config
for p in path[:-1]:
cursor = cursor[p]
return cursor, path[-1]
def _is_deprecated(key: str) -> bool:
"""Returns True if the given option has been deprecated"""
key = key.lower()
return key in _deprecated_options
def _get_deprecated_option(key: str):
"""
Retrieves the metadata for a deprecated option, if `key` is deprecated.
Returns
-------
DeprecatedOption (namedtuple) if key is deprecated, None otherwise
"""
try:
d = _deprecated_options[key]
except KeyError:
return None
else:
return d
def _get_registered_option(key: str):
"""
Retrieves the option metadata if `key` is a registered option.
Returns
-------
RegisteredOption (namedtuple) if key is deprecated, None otherwise
"""
return _registered_options.get(key)
def _translate_key(key: str) -> str:
"""
if key id deprecated and a replacement key defined, will return the
replacement key, otherwise returns `key` as - is
"""
d = _get_deprecated_option(key)
if d:
return d.rkey or key
else:
return key
def _warn_if_deprecated(key: str) -> bool:
"""
Checks if `key` is a deprecated option and if so, prints a warning.
Returns
-------
bool - True if `key` is deprecated, False otherwise.
"""
d = _get_deprecated_option(key)
if d:
if d.msg:
warnings.warn(
d.msg,
FutureWarning,
stacklevel=find_stack_level(),
)
else:
msg = f"'{key}' is deprecated"
if d.removal_ver:
msg += f" and will be removed in {d.removal_ver}"
if d.rkey:
msg += f", please use '{d.rkey}' instead."
else:
msg += ", please refrain from using it."
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
return True
return False
def _build_option_description(k: str) -> str:
"""Builds a formatted description of a registered option and prints it"""
o = _get_registered_option(k)
d = _get_deprecated_option(k)
s = f"{k} "
if o.doc:
s += "\n".join(o.doc.strip().split("\n"))
else:
s += "No description available."
if o:
s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]"
if d:
rkey = d.rkey or ""
s += "\n (Deprecated"
s += f", use `{rkey}` instead."
s += ")"
return s
def pp_options_list(keys: Iterable[str], width: int = 80, _print: bool = False):
"""Builds a concise listing of available options, grouped by prefix"""
from itertools import groupby
from textwrap import wrap
def pp(name: str, ks: Iterable[str]) -> list[str]:
pfx = "- " + name + ".[" if name else ""
ls = wrap(
", ".join(ks),
width,
initial_indent=pfx,
subsequent_indent=" ",
break_long_words=False,
)
if ls and ls[-1] and name:
ls[-1] = ls[-1] + "]"
return ls
ls: list[str] = []
singles = [x for x in sorted(keys) if x.find(".") < 0]
if singles:
ls += pp("", singles)
keys = [x for x in keys if x.find(".") >= 0]
for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]):
ks = [x[len(k) + 1 :] for x in list(g)]
ls += pp(k, ks)
s = "\n".join(ls)
if _print:
print(s)
else:
return s
#
# helpers
@contextmanager
def config_prefix(prefix: str) -> Generator[None, None, None]:
"""
contextmanager for multiple invocations of API with a common prefix
supported API functions: (register / get / set )__option
Warning: This is not thread - safe, and won't work properly if you import
the API functions into your module using the "from x import y" construct.
Example
-------
import pandas._config.config as cf
with cf.config_prefix("display.font"):
cf.register_option("color", "red")
cf.register_option("size", " 5 pt")
cf.set_option(size, " 6 pt")
cf.get_option(size)
...
etc'
will register options "display.font.color", "display.font.size", set the
value of "display.font.size"... and so on.
"""
# Note: reset_option relies on set_option, and on key directly
# it does not fit in to this monkey-patching scheme
global register_option, get_option, set_option
def wrap(func: F) -> F:
def inner(key: str, *args, **kwds):
pkey = f"{prefix}.{key}"
return func(pkey, *args, **kwds)
return cast(F, inner)
_register_option = register_option
_get_option = get_option
_set_option = set_option
set_option = wrap(set_option)
get_option = wrap(get_option)
register_option = wrap(register_option)
try:
yield
finally:
set_option = _set_option
get_option = _get_option
register_option = _register_option
# These factories and methods are handy for use as the validator
# arg in register_option
def is_type_factory(_type: type[Any]) -> Callable[[Any], None]:
"""
Parameters
----------
`_type` - a type to be compared against (e.g. type(x) == `_type`)
Returns
-------
validator - a function of a single argument x , which raises
ValueError if type(x) is not equal to `_type`
"""
def inner(x) -> None:
if type(x) != _type:
raise ValueError(f"Value must have type '{_type}'")
return inner
def is_instance_factory(_type) -> Callable[[Any], None]:
"""
Parameters
----------
`_type` - the type to be checked against
Returns
-------
validator - a function of a single argument x , which raises
ValueError if x is not an instance of `_type`
"""
if isinstance(_type, (tuple, list)):
_type = tuple(_type)
type_repr = "|".join(map(str, _type))
else:
type_repr = f"'{_type}'"
def inner(x) -> None:
if not isinstance(x, _type):
raise ValueError(f"Value must be an instance of {type_repr}")
return inner
def is_one_of_factory(legal_values) -> Callable[[Any], None]:
callables = [c for c in legal_values if callable(c)]
legal_values = [c for c in legal_values if not callable(c)]
def inner(x) -> None:
if x not in legal_values:
if not any(c(x) for c in callables):
uvals = [str(lval) for lval in legal_values]
pp_values = "|".join(uvals)
msg = f"Value must be one of {pp_values}"
if len(callables):
msg += " or a callable"
raise ValueError(msg)
return inner
def is_nonnegative_int(value: object) -> None:
"""
Verify that value is None or a positive int.
Parameters
----------
value : None or int
The `value` to be checked.
Raises
------
ValueError
When the value is not None or is a negative integer
"""
if value is None:
return
elif isinstance(value, int):
if value >= 0:
return
msg = "Value must be a nonnegative integer or None"
raise ValueError(msg)
# common type validators, for convenience
# usage: register_option(... , validator = is_int)
is_int = is_type_factory(int)
is_bool = is_type_factory(bool)
is_float = is_type_factory(float)
is_str = is_type_factory(str)
is_text = is_instance_factory((str, bytes))
def is_callable(obj) -> bool:
"""
Parameters
----------
`obj` - the object to be checked
Returns
-------
validator - returns True if object is callable
raises ValueError otherwise.
"""
if not callable(obj):
raise ValueError("Value must be a callable")
return True

View file

@ -0,0 +1,25 @@
"""
config for datetime formatting
"""
from __future__ import annotations
from pandas._config import config as cf
pc_date_dayfirst_doc = """
: boolean
When True, prints and parses dates with the day first, eg 20/01/2005
"""
pc_date_yearfirst_doc = """
: boolean
When True, prints and parses dates with the year first, eg 2005/01/20
"""
with cf.config_prefix("display"):
# Needed upstream of `_libs` because these are used in tslibs.parsing
cf.register_option(
"date_dayfirst", False, pc_date_dayfirst_doc, validator=cf.is_bool
)
cf.register_option(
"date_yearfirst", False, pc_date_yearfirst_doc, validator=cf.is_bool
)

View file

@ -0,0 +1,62 @@
"""
Unopinionated display configuration.
"""
from __future__ import annotations
import locale
import sys
from pandas._config import config as cf
# -----------------------------------------------------------------------------
# Global formatting options
_initial_defencoding: str | None = None
def detect_console_encoding() -> str:
"""
Try to find the most capable encoding supported by the console.
slightly modified from the way IPython handles the same issue.
"""
global _initial_defencoding
encoding = None
try:
encoding = sys.stdout.encoding or sys.stdin.encoding
except (AttributeError, OSError):
pass
# try again for something better
if not encoding or "ascii" in encoding.lower():
try:
encoding = locale.getpreferredencoding()
except locale.Error:
# can be raised by locale.setlocale(), which is
# called by getpreferredencoding
# (on some systems, see stdlib locale docs)
pass
# when all else fails. this will usually be "ascii"
if not encoding or "ascii" in encoding.lower():
encoding = sys.getdefaultencoding()
# GH#3360, save the reported defencoding at import time
# MPL backends may change it. Make available for debugging.
if not _initial_defencoding:
_initial_defencoding = sys.getdefaultencoding()
return encoding
pc_encoding_doc = """
: str/unicode
Defaults to the detected encoding of the console.
Specifies the encoding to be used for strings returned by to_string,
these are generally strings meant to be displayed on the console.
"""
with cf.config_prefix("display"):
cf.register_option(
"encoding", detect_console_encoding(), pc_encoding_doc, validator=cf.is_text
)

View file

@ -0,0 +1,172 @@
"""
Helpers for configuring locale settings.
Name `localization` is chosen to avoid overlap with builtin `locale` module.
"""
from __future__ import annotations
from contextlib import contextmanager
import locale
import platform
import re
import subprocess
from typing import TYPE_CHECKING
from pandas._config.config import options
if TYPE_CHECKING:
from collections.abc import Generator
@contextmanager
def set_locale(
new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL
) -> Generator[str | tuple[str, str], None, None]:
"""
Context manager for temporarily setting a locale.
Parameters
----------
new_locale : str or tuple
A string of the form <language_country>.<encoding>. For example to set
the current locale to US English with a UTF8 encoding, you would pass
"en_US.UTF-8".
lc_var : int, default `locale.LC_ALL`
The category of the locale being set.
Notes
-----
This is useful when you want to run a particular block of code under a
particular locale, without globally setting the locale. This probably isn't
thread-safe.
"""
# getlocale is not always compliant with setlocale, use setlocale. GH#46595
current_locale = locale.setlocale(lc_var)
try:
locale.setlocale(lc_var, new_locale)
normalized_code, normalized_encoding = locale.getlocale()
if normalized_code is not None and normalized_encoding is not None:
yield f"{normalized_code}.{normalized_encoding}"
else:
yield new_locale
finally:
locale.setlocale(lc_var, current_locale)
def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
"""
Check to see if we can set a locale, and subsequently get the locale,
without raising an Exception.
Parameters
----------
lc : str
The locale to attempt to set.
lc_var : int, default `locale.LC_ALL`
The category of the locale being set.
Returns
-------
bool
Whether the passed locale can be set
"""
try:
with set_locale(lc, lc_var=lc_var):
pass
except (ValueError, locale.Error):
# horrible name for a Exception subclass
return False
else:
return True
def _valid_locales(locales: list[str] | str, normalize: bool) -> list[str]:
"""
Return a list of normalized locales that do not throw an ``Exception``
when set.
Parameters
----------
locales : str
A string where each locale is separated by a newline.
normalize : bool
Whether to call ``locale.normalize`` on each locale.
Returns
-------
valid_locales : list
A list of valid locales.
"""
return [
loc
for loc in (
locale.normalize(loc.strip()) if normalize else loc.strip()
for loc in locales
)
if can_set_locale(loc)
]
def get_locales(
prefix: str | None = None,
normalize: bool = True,
) -> list[str]:
"""
Get all the locales that are available on the system.
Parameters
----------
prefix : str
If not ``None`` then return only those locales with the prefix
provided. For example to get all English language locales (those that
start with ``"en"``), pass ``prefix="en"``.
normalize : bool
Call ``locale.normalize`` on the resulting list of available locales.
If ``True``, only locales that can be set without throwing an
``Exception`` are returned.
Returns
-------
locales : list of strings
A list of locale strings that can be set with ``locale.setlocale()``.
For example::
locale.setlocale(locale.LC_ALL, locale_string)
On error will return an empty list (no locale available, e.g. Windows)
"""
if platform.system() in ("Linux", "Darwin"):
raw_locales = subprocess.check_output(["locale", "-a"])
else:
# Other platforms e.g. windows platforms don't define "locale -a"
# Note: is_platform_windows causes circular import here
return []
try:
# raw_locales is "\n" separated list of locales
# it may contain non-decodable parts, so split
# extract what we can and then rejoin.
split_raw_locales = raw_locales.split(b"\n")
out_locales = []
for x in split_raw_locales:
try:
out_locales.append(str(x, encoding=options.display.encoding))
except UnicodeError:
# 'locale -a' is used to populated 'raw_locales' and on
# Redhat 7 Linux (and maybe others) prints locale names
# using windows-1252 encoding. Bug only triggered by
# a few special characters and when there is an
# extensive list of installed locales.
out_locales.append(str(x, encoding="windows-1252"))
except TypeError:
pass
if prefix is None:
return _valid_locales(out_locales, normalize)
pattern = re.compile(f"{prefix}.*")
found = pattern.findall("\n".join(out_locales))
return _valid_locales(found, normalize)

View file

@ -0,0 +1,27 @@
__all__ = [
"NaT",
"NaTType",
"OutOfBoundsDatetime",
"Period",
"Timedelta",
"Timestamp",
"iNaT",
"Interval",
]
# Below imports needs to happen first to ensure pandas top level
# module gets monkeypatched with the pandas_datetime_CAPI
# see pandas_datetime_exec in pd_datetime.c
import pandas._libs.pandas_parser # noqa: E501 # isort: skip # type: ignore[reportUnusedImport]
import pandas._libs.pandas_datetime # noqa: F401,E501 # isort: skip # type: ignore[reportUnusedImport]
from pandas._libs.interval import Interval
from pandas._libs.tslibs import (
NaT,
NaTType,
OutOfBoundsDatetime,
Period,
Timedelta,
Timestamp,
iNaT,
)

View file

@ -0,0 +1,416 @@
from typing import Any
import numpy as np
from pandas._typing import npt
class Infinity:
def __eq__(self, other) -> bool: ...
def __ne__(self, other) -> bool: ...
def __lt__(self, other) -> bool: ...
def __le__(self, other) -> bool: ...
def __gt__(self, other) -> bool: ...
def __ge__(self, other) -> bool: ...
class NegInfinity:
def __eq__(self, other) -> bool: ...
def __ne__(self, other) -> bool: ...
def __lt__(self, other) -> bool: ...
def __le__(self, other) -> bool: ...
def __gt__(self, other) -> bool: ...
def __ge__(self, other) -> bool: ...
def unique_deltas(
arr: np.ndarray, # const int64_t[:]
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ...
def groupsort_indexer(
index: np.ndarray, # const int64_t[:]
ngroups: int,
) -> tuple[
np.ndarray, # ndarray[int64_t, ndim=1]
np.ndarray, # ndarray[int64_t, ndim=1]
]: ...
def kth_smallest(
arr: np.ndarray, # numeric[:]
k: int,
) -> Any: ... # numeric
# ----------------------------------------------------------------------
# Pairwise correlation/covariance
def nancorr(
mat: npt.NDArray[np.float64], # const float64_t[:, :]
cov: bool = ...,
minp: int | None = ...,
) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2]
def nancorr_spearman(
mat: npt.NDArray[np.float64], # ndarray[float64_t, ndim=2]
minp: int = ...,
) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2]
# ----------------------------------------------------------------------
def validate_limit(nobs: int | None, limit=...) -> int: ...
def get_fill_indexer(
mask: npt.NDArray[np.bool_],
limit: int | None = None,
) -> npt.NDArray[np.intp]: ...
def pad(
old: np.ndarray, # ndarray[numeric_object_t]
new: np.ndarray, # ndarray[numeric_object_t]
limit=...,
) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1]
def pad_inplace(
values: np.ndarray, # numeric_object_t[:]
mask: np.ndarray, # uint8_t[:]
limit=...,
) -> None: ...
def pad_2d_inplace(
values: np.ndarray, # numeric_object_t[:, :]
mask: np.ndarray, # const uint8_t[:, :]
limit=...,
) -> None: ...
def backfill(
old: np.ndarray, # ndarray[numeric_object_t]
new: np.ndarray, # ndarray[numeric_object_t]
limit=...,
) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1]
def backfill_inplace(
values: np.ndarray, # numeric_object_t[:]
mask: np.ndarray, # uint8_t[:]
limit=...,
) -> None: ...
def backfill_2d_inplace(
values: np.ndarray, # numeric_object_t[:, :]
mask: np.ndarray, # const uint8_t[:, :]
limit=...,
) -> None: ...
def is_monotonic(
arr: np.ndarray, # ndarray[numeric_object_t, ndim=1]
timelike: bool,
) -> tuple[bool, bool, bool]: ...
# ----------------------------------------------------------------------
# rank_1d, rank_2d
# ----------------------------------------------------------------------
def rank_1d(
values: np.ndarray, # ndarray[numeric_object_t, ndim=1]
labels: np.ndarray | None = ..., # const int64_t[:]=None
is_datetimelike: bool = ...,
ties_method=...,
ascending: bool = ...,
pct: bool = ...,
na_option=...,
mask: npt.NDArray[np.bool_] | None = ...,
) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1]
def rank_2d(
in_arr: np.ndarray, # ndarray[numeric_object_t, ndim=2]
axis: int = ...,
is_datetimelike: bool = ...,
ties_method=...,
ascending: bool = ...,
na_option=...,
pct: bool = ...,
) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1]
def diff_2d(
arr: np.ndarray, # ndarray[diff_t, ndim=2]
out: np.ndarray, # ndarray[out_t, ndim=2]
periods: int,
axis: int,
datetimelike: bool = ...,
) -> None: ...
def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ...
def ensure_object(arr: object) -> npt.NDArray[np.object_]: ...
def ensure_float64(arr: object) -> npt.NDArray[np.float64]: ...
def ensure_int8(arr: object) -> npt.NDArray[np.int8]: ...
def ensure_int16(arr: object) -> npt.NDArray[np.int16]: ...
def ensure_int32(arr: object) -> npt.NDArray[np.int32]: ...
def ensure_int64(arr: object) -> npt.NDArray[np.int64]: ...
def ensure_uint64(arr: object) -> npt.NDArray[np.uint64]: ...
def take_1d_int8_int8(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int8_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int8_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int8_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_int16(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int32_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int32_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int64_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_float32_float32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_float32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_float64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_object_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_bool_bool(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_bool_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_int8(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_int16(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int32_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int32_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int64_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_float32_float32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_float32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_float64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_object_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_bool_bool(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_bool_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_int8(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_int16(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int32_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int32_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int64_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_float32_float32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_float32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_float64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_object_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_bool_bool(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_bool_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_multi_int8_int8(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int8_int32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int8_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int8_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_int16(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_int32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int32_int32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int32_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int32_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int64_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_float32_float32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_float32_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_float64_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_object_object(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_bool_bool(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_bool_object(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int64_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...

View file

@ -0,0 +1,40 @@
from typing import Sequence
import numpy as np
from pandas._typing import (
AxisInt,
DtypeObj,
Self,
Shape,
)
class NDArrayBacked:
_dtype: DtypeObj
_ndarray: np.ndarray
def __init__(self, values: np.ndarray, dtype: DtypeObj) -> None: ...
@classmethod
def _simple_new(cls, values: np.ndarray, dtype: DtypeObj): ...
def _from_backing_data(self, values: np.ndarray): ...
def __setstate__(self, state): ...
def __len__(self) -> int: ...
@property
def shape(self) -> Shape: ...
@property
def ndim(self) -> int: ...
@property
def size(self) -> int: ...
@property
def nbytes(self) -> int: ...
def copy(self): ...
def delete(self, loc, axis=...): ...
def swapaxes(self, axis1, axis2): ...
def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ...
def reshape(self, *args, **kwargs): ...
def ravel(self, order=...): ...
@property
def T(self): ...
@classmethod
def _concat_same_type(
cls, to_concat: Sequence[Self], axis: AxisInt = ...
) -> Self: ...

View file

@ -0,0 +1,5 @@
def read_float_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
def read_double_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
def read_uint16_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
def read_uint32_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
def read_uint64_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...

View file

@ -0,0 +1,203 @@
from typing import Literal
import numpy as np
from pandas._typing import npt
def group_median_float64(
out: np.ndarray, # ndarray[float64_t, ndim=2]
counts: npt.NDArray[np.int64],
values: np.ndarray, # ndarray[float64_t, ndim=2]
labels: npt.NDArray[np.int64],
min_count: int = ..., # Py_ssize_t
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_cumprod(
out: np.ndarray, # float64_t[:, ::1]
values: np.ndarray, # const float64_t[:, :]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
skipna: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_cumsum(
out: np.ndarray, # int64float_t[:, ::1]
values: np.ndarray, # ndarray[int64float_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
skipna: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_shift_indexer(
out: np.ndarray, # int64_t[::1]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
periods: int,
) -> None: ...
def group_fillna_indexer(
out: np.ndarray, # ndarray[intp_t]
labels: np.ndarray, # ndarray[int64_t]
sorted_labels: npt.NDArray[np.intp],
mask: npt.NDArray[np.uint8],
direction: Literal["ffill", "bfill"],
limit: int, # int64_t
dropna: bool,
) -> None: ...
def group_any_all(
out: np.ndarray, # uint8_t[::1]
values: np.ndarray, # const uint8_t[::1]
labels: np.ndarray, # const int64_t[:]
mask: np.ndarray, # const uint8_t[::1]
val_test: Literal["any", "all"],
skipna: bool,
nullable: bool,
) -> None: ...
def group_sum(
out: np.ndarray, # complexfloatingintuint_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[complexfloatingintuint_t, ndim=2]
labels: np.ndarray, # const intp_t[:]
mask: np.ndarray | None,
result_mask: np.ndarray | None = ...,
min_count: int = ...,
is_datetimelike: bool = ...,
) -> None: ...
def group_prod(
out: np.ndarray, # int64float_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[int64float_t, ndim=2]
labels: np.ndarray, # const intp_t[:]
mask: np.ndarray | None,
result_mask: np.ndarray | None = ...,
min_count: int = ...,
) -> None: ...
def group_var(
out: np.ndarray, # floating[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[floating, ndim=2]
labels: np.ndarray, # const intp_t[:]
min_count: int = ..., # Py_ssize_t
ddof: int = ..., # int64_t
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
is_datetimelike: bool = ...,
name: str = ...,
) -> None: ...
def group_skew(
out: np.ndarray, # float64_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[float64_T, ndim=2]
labels: np.ndarray, # const intp_t[::1]
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
skipna: bool = ...,
) -> None: ...
def group_mean(
out: np.ndarray, # floating[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[floating, ndim=2]
labels: np.ndarray, # const intp_t[:]
min_count: int = ..., # Py_ssize_t
is_datetimelike: bool = ..., # bint
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_ohlc(
out: np.ndarray, # floatingintuint_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[floatingintuint_t, ndim=2]
labels: np.ndarray, # const intp_t[:]
min_count: int = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_quantile(
out: npt.NDArray[np.float64],
values: np.ndarray, # ndarray[numeric, ndim=1]
labels: npt.NDArray[np.intp],
mask: npt.NDArray[np.uint8],
qs: npt.NDArray[np.float64], # const
starts: npt.NDArray[np.int64],
ends: npt.NDArray[np.int64],
interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
result_mask: np.ndarray | None,
is_datetimelike: bool,
) -> None: ...
def group_last(
out: np.ndarray, # rank_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[rank_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
mask: npt.NDArray[np.bool_] | None,
result_mask: npt.NDArray[np.bool_] | None = ...,
min_count: int = ..., # Py_ssize_t
is_datetimelike: bool = ...,
) -> None: ...
def group_nth(
out: np.ndarray, # rank_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[rank_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
mask: npt.NDArray[np.bool_] | None,
result_mask: npt.NDArray[np.bool_] | None = ...,
min_count: int = ..., # int64_t
rank: int = ..., # int64_t
is_datetimelike: bool = ...,
) -> None: ...
def group_rank(
out: np.ndarray, # float64_t[:, ::1]
values: np.ndarray, # ndarray[rank_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
ties_method: Literal["average", "min", "max", "first", "dense"] = ...,
ascending: bool = ...,
pct: bool = ...,
na_option: Literal["keep", "top", "bottom"] = ...,
mask: npt.NDArray[np.bool_] | None = ...,
) -> None: ...
def group_max(
out: np.ndarray, # groupby_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
min_count: int = ...,
is_datetimelike: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_min(
out: np.ndarray, # groupby_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
min_count: int = ...,
is_datetimelike: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_cummin(
out: np.ndarray, # groupby_t[:, ::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
skipna: bool = ...,
) -> None: ...
def group_cummax(
out: np.ndarray, # groupby_t[:, ::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
skipna: bool = ...,
) -> None: ...

View file

@ -0,0 +1,9 @@
import numpy as np
from pandas._typing import npt
def hash_object_array(
arr: npt.NDArray[np.object_],
key: str,
encoding: str = ...,
) -> npt.NDArray[np.uint64]: ...

View file

@ -0,0 +1,251 @@
from typing import (
Any,
Hashable,
Literal,
)
import numpy as np
from pandas._typing import npt
def unique_label_indices(
labels: np.ndarray, # const int64_t[:]
) -> np.ndarray: ...
class Factorizer:
count: int
uniques: Any
def __init__(self, size_hint: int) -> None: ...
def get_count(self) -> int: ...
def factorize(
self,
values: np.ndarray,
sort: bool = ...,
na_sentinel=...,
na_value=...,
mask=...,
) -> npt.NDArray[np.intp]: ...
class ObjectFactorizer(Factorizer):
table: PyObjectHashTable
uniques: ObjectVector
class Int64Factorizer(Factorizer):
table: Int64HashTable
uniques: Int64Vector
class UInt64Factorizer(Factorizer):
table: UInt64HashTable
uniques: UInt64Vector
class Int32Factorizer(Factorizer):
table: Int32HashTable
uniques: Int32Vector
class UInt32Factorizer(Factorizer):
table: UInt32HashTable
uniques: UInt32Vector
class Int16Factorizer(Factorizer):
table: Int16HashTable
uniques: Int16Vector
class UInt16Factorizer(Factorizer):
table: UInt16HashTable
uniques: UInt16Vector
class Int8Factorizer(Factorizer):
table: Int8HashTable
uniques: Int8Vector
class UInt8Factorizer(Factorizer):
table: UInt8HashTable
uniques: UInt8Vector
class Float64Factorizer(Factorizer):
table: Float64HashTable
uniques: Float64Vector
class Float32Factorizer(Factorizer):
table: Float32HashTable
uniques: Float32Vector
class Complex64Factorizer(Factorizer):
table: Complex64HashTable
uniques: Complex64Vector
class Complex128Factorizer(Factorizer):
table: Complex128HashTable
uniques: Complex128Vector
class Int64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int64]: ...
class Int32Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int32]: ...
class Int16Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int16]: ...
class Int8Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int8]: ...
class UInt64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint64]: ...
class UInt32Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint32]: ...
class UInt16Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint16]: ...
class UInt8Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint8]: ...
class Float64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.float64]: ...
class Float32Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.float32]: ...
class Complex128Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.complex128]: ...
class Complex64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.complex64]: ...
class StringVector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.object_]: ...
class ObjectVector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.object_]: ...
class HashTable:
# NB: The base HashTable class does _not_ actually have these methods;
# we are putting them here for the sake of mypy to avoid
# reproducing them in each subclass below.
def __init__(self, size_hint: int = ..., uses_mask: bool = ...) -> None: ...
def __len__(self) -> int: ...
def __contains__(self, key: Hashable) -> bool: ...
def sizeof(self, deep: bool = ...) -> int: ...
def get_state(self) -> dict[str, int]: ...
# TODO: `item` type is subclass-specific
def get_item(self, item): ... # TODO: return type?
def set_item(self, item, val) -> None: ...
def get_na(self): ... # TODO: return type?
def set_na(self, val) -> None: ...
def map_locations(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
mask: npt.NDArray[np.bool_] | None = ...,
) -> None: ...
def lookup(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
mask: npt.NDArray[np.bool_] | None = ...,
) -> npt.NDArray[np.intp]: ...
def get_labels(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
uniques, # SubclassTypeVector
count_prior: int = ...,
na_sentinel: int = ...,
na_value: object = ...,
mask=...,
) -> npt.NDArray[np.intp]: ...
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
return_inverse: bool = ...,
) -> (
tuple[
np.ndarray, # np.ndarray[subclass-specific]
npt.NDArray[np.intp],
]
| np.ndarray
): ... # np.ndarray[subclass-specific]
def factorize(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
na_sentinel: int = ...,
na_value: object = ...,
mask=...,
) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific]
class Complex128HashTable(HashTable): ...
class Complex64HashTable(HashTable): ...
class Float64HashTable(HashTable): ...
class Float32HashTable(HashTable): ...
class Int64HashTable(HashTable):
# Only Int64HashTable has get_labels_groupby, map_keys_to_values
def get_labels_groupby(
self,
values: npt.NDArray[np.int64], # const int64_t[:]
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: ...
def map_keys_to_values(
self,
keys: npt.NDArray[np.int64],
values: npt.NDArray[np.int64], # const int64_t[:]
) -> None: ...
class Int32HashTable(HashTable): ...
class Int16HashTable(HashTable): ...
class Int8HashTable(HashTable): ...
class UInt64HashTable(HashTable): ...
class UInt32HashTable(HashTable): ...
class UInt16HashTable(HashTable): ...
class UInt8HashTable(HashTable): ...
class StringHashTable(HashTable): ...
class PyObjectHashTable(HashTable): ...
class IntpHashTable(HashTable): ...
def duplicated(
values: np.ndarray,
keep: Literal["last", "first", False] = ...,
mask: npt.NDArray[np.bool_] | None = ...,
) -> npt.NDArray[np.bool_]: ...
def mode(
values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = ...
) -> np.ndarray: ...
def value_count(
values: np.ndarray,
dropna: bool,
mask: npt.NDArray[np.bool_] | None = ...,
) -> tuple[np.ndarray, npt.NDArray[np.int64]]: ... # np.ndarray[same-as-values]
# arr and values should have same dtype
def ismember(
arr: np.ndarray,
values: np.ndarray,
) -> npt.NDArray[np.bool_]: ...
def object_hash(obj) -> int: ...
def objects_are_equal(a, b) -> bool: ...

View file

@ -0,0 +1,107 @@
import numpy as np
from pandas._typing import npt
from pandas import MultiIndex
from pandas.core.arrays import ExtensionArray
multiindex_nulls_shift: int
class IndexEngine:
over_size_threshold: bool
def __init__(self, values: np.ndarray) -> None: ...
def __contains__(self, val: object) -> bool: ...
# -> int | slice | np.ndarray[bool]
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
def sizeof(self, deep: bool = ...) -> int: ...
def __sizeof__(self) -> int: ...
@property
def is_unique(self) -> bool: ...
@property
def is_monotonic_increasing(self) -> bool: ...
@property
def is_monotonic_decreasing(self) -> bool: ...
@property
def is_mapping_populated(self) -> bool: ...
def clear_mapping(self): ...
def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...
def get_indexer_non_unique(
self,
targets: np.ndarray,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
class MaskedIndexEngine(IndexEngine):
def __init__(self, values: object) -> None: ...
def get_indexer_non_unique(
self, targets: object
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
class Float64Engine(IndexEngine): ...
class Float32Engine(IndexEngine): ...
class Complex128Engine(IndexEngine): ...
class Complex64Engine(IndexEngine): ...
class Int64Engine(IndexEngine): ...
class Int32Engine(IndexEngine): ...
class Int16Engine(IndexEngine): ...
class Int8Engine(IndexEngine): ...
class UInt64Engine(IndexEngine): ...
class UInt32Engine(IndexEngine): ...
class UInt16Engine(IndexEngine): ...
class UInt8Engine(IndexEngine): ...
class ObjectEngine(IndexEngine): ...
class DatetimeEngine(Int64Engine): ...
class TimedeltaEngine(DatetimeEngine): ...
class PeriodEngine(Int64Engine): ...
class BoolEngine(UInt8Engine): ...
class MaskedFloat64Engine(MaskedIndexEngine): ...
class MaskedFloat32Engine(MaskedIndexEngine): ...
class MaskedComplex128Engine(MaskedIndexEngine): ...
class MaskedComplex64Engine(MaskedIndexEngine): ...
class MaskedInt64Engine(MaskedIndexEngine): ...
class MaskedInt32Engine(MaskedIndexEngine): ...
class MaskedInt16Engine(MaskedIndexEngine): ...
class MaskedInt8Engine(MaskedIndexEngine): ...
class MaskedUInt64Engine(MaskedIndexEngine): ...
class MaskedUInt32Engine(MaskedIndexEngine): ...
class MaskedUInt16Engine(MaskedIndexEngine): ...
class MaskedUInt8Engine(MaskedIndexEngine): ...
class MaskedBoolEngine(MaskedUInt8Engine): ...
class BaseMultiIndexCodesEngine:
levels: list[np.ndarray]
offsets: np.ndarray # ndarray[uint64_t, ndim=1]
def __init__(
self,
levels: list[np.ndarray], # all entries hashable
labels: list[np.ndarray], # all entries integer-dtyped
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
) -> None: ...
def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ...
def _extract_level_codes(self, target: MultiIndex) -> np.ndarray: ...
def get_indexer_with_fill(
self,
target: np.ndarray, # np.ndarray[object] of tuples
values: np.ndarray, # np.ndarray[object] of tuples
method: str,
limit: int | None,
) -> npt.NDArray[np.intp]: ...
class ExtensionEngine:
def __init__(self, values: ExtensionArray) -> None: ...
def __contains__(self, val: object) -> bool: ...
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...
def get_indexer_non_unique(
self,
targets: np.ndarray,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
@property
def is_unique(self) -> bool: ...
@property
def is_monotonic_increasing(self) -> bool: ...
@property
def is_monotonic_decreasing(self) -> bool: ...
def sizeof(self, deep: bool = ...) -> int: ...
def clear_mapping(self): ...

View file

@ -0,0 +1,17 @@
from typing import (
Generic,
TypeVar,
)
from pandas.core.indexing import IndexingMixin
_IndexingMixinT = TypeVar("_IndexingMixinT", bound=IndexingMixin)
class NDFrameIndexerBase(Generic[_IndexingMixinT]):
name: str
# in practice obj is either a DataFrame or a Series
obj: _IndexingMixinT
def __init__(self, name: str, obj: _IndexingMixinT) -> None: ...
@property
def ndim(self) -> int: ...

View file

@ -0,0 +1,106 @@
from typing import (
Iterator,
Sequence,
final,
overload,
)
import weakref
import numpy as np
from pandas._typing import (
ArrayLike,
Self,
npt,
)
from pandas import Index
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.internals.blocks import Block as B
def slice_len(slc: slice, objlen: int = ...) -> int: ...
def get_concat_blkno_indexers(
blknos_list: list[npt.NDArray[np.intp]],
) -> list[tuple[npt.NDArray[np.intp], BlockPlacement]]: ...
def get_blkno_indexers(
blknos: np.ndarray, # int64_t[:]
group: bool = ...,
) -> list[tuple[int, slice | np.ndarray]]: ...
def get_blkno_placements(
blknos: np.ndarray,
group: bool = ...,
) -> Iterator[tuple[int, BlockPlacement]]: ...
def update_blklocs_and_blknos(
blklocs: npt.NDArray[np.intp],
blknos: npt.NDArray[np.intp],
loc: int,
nblocks: int,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
@final
class BlockPlacement:
def __init__(self, val: int | slice | np.ndarray) -> None: ...
@property
def indexer(self) -> np.ndarray | slice: ...
@property
def as_array(self) -> np.ndarray: ...
@property
def as_slice(self) -> slice: ...
@property
def is_slice_like(self) -> bool: ...
@overload
def __getitem__(
self, loc: slice | Sequence[int] | npt.NDArray[np.intp]
) -> BlockPlacement: ...
@overload
def __getitem__(self, loc: int) -> int: ...
def __iter__(self) -> Iterator[int]: ...
def __len__(self) -> int: ...
def delete(self, loc) -> BlockPlacement: ...
def add(self, other) -> BlockPlacement: ...
def append(self, others: list[BlockPlacement]) -> BlockPlacement: ...
def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ...
class SharedBlock:
_mgr_locs: BlockPlacement
ndim: int
values: ArrayLike
refs: BlockValuesRefs
def __init__(
self,
values: ArrayLike,
placement: BlockPlacement,
ndim: int,
refs: BlockValuesRefs | None = ...,
) -> None: ...
class NumpyBlock(SharedBlock):
values: np.ndarray
@final
def slice_block_rows(self, slicer: slice) -> Self: ...
class NDArrayBackedBlock(SharedBlock):
values: NDArrayBackedExtensionArray
@final
def slice_block_rows(self, slicer: slice) -> Self: ...
class Block(SharedBlock): ...
class BlockManager:
blocks: tuple[B, ...]
axes: list[Index]
_known_consolidated: bool
_is_consolidated: bool
_blknos: np.ndarray
_blklocs: np.ndarray
def __init__(
self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=...
) -> None: ...
def get_slice(self, slobj: slice, axis: int = ...) -> Self: ...
def _rebuild_blknos_and_blklocs(self) -> None: ...
class BlockValuesRefs:
referenced_blocks: list[weakref.ref]
def __init__(self, blk: SharedBlock | None = ...) -> None: ...
def add_reference(self, blk: SharedBlock) -> None: ...
def add_index_reference(self, index: Index) -> None: ...
def has_reference(self) -> bool: ...

View file

@ -0,0 +1,174 @@
from typing import (
Any,
Generic,
TypeVar,
overload,
)
import numpy as np
import numpy.typing as npt
from pandas._typing import (
IntervalClosedType,
Timedelta,
Timestamp,
)
VALID_CLOSED: frozenset[str]
_OrderableScalarT = TypeVar("_OrderableScalarT", int, float)
_OrderableTimesT = TypeVar("_OrderableTimesT", Timestamp, Timedelta)
_OrderableT = TypeVar("_OrderableT", int, float, Timestamp, Timedelta)
class _LengthDescriptor:
@overload
def __get__(
self, instance: Interval[_OrderableScalarT], owner: Any
) -> _OrderableScalarT: ...
@overload
def __get__(
self, instance: Interval[_OrderableTimesT], owner: Any
) -> Timedelta: ...
class _MidDescriptor:
@overload
def __get__(self, instance: Interval[_OrderableScalarT], owner: Any) -> float: ...
@overload
def __get__(
self, instance: Interval[_OrderableTimesT], owner: Any
) -> _OrderableTimesT: ...
class IntervalMixin:
@property
def closed_left(self) -> bool: ...
@property
def closed_right(self) -> bool: ...
@property
def open_left(self) -> bool: ...
@property
def open_right(self) -> bool: ...
@property
def is_empty(self) -> bool: ...
def _check_closed_matches(self, other: IntervalMixin, name: str = ...) -> None: ...
class Interval(IntervalMixin, Generic[_OrderableT]):
@property
def left(self: Interval[_OrderableT]) -> _OrderableT: ...
@property
def right(self: Interval[_OrderableT]) -> _OrderableT: ...
@property
def closed(self) -> IntervalClosedType: ...
mid: _MidDescriptor
length: _LengthDescriptor
def __init__(
self,
left: _OrderableT,
right: _OrderableT,
closed: IntervalClosedType = ...,
) -> None: ...
def __hash__(self) -> int: ...
@overload
def __contains__(
self: Interval[Timedelta], key: Timedelta | Interval[Timedelta]
) -> bool: ...
@overload
def __contains__(
self: Interval[Timestamp], key: Timestamp | Interval[Timestamp]
) -> bool: ...
@overload
def __contains__(
self: Interval[_OrderableScalarT],
key: _OrderableScalarT | Interval[_OrderableScalarT],
) -> bool: ...
@overload
def __add__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __add__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __add__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __radd__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __radd__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __radd__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __sub__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __sub__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __sub__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __rsub__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __rsub__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __rsub__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __mul__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __mul__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __rmul__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __rmul__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __truediv__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __truediv__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __floordiv__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __floordiv__(self: Interval[float], y: float) -> Interval[float]: ...
def overlaps(self: Interval[_OrderableT], other: Interval[_OrderableT]) -> bool: ...
def intervals_to_interval_bounds(
intervals: np.ndarray, validate_closed: bool = ...
) -> tuple[np.ndarray, np.ndarray, IntervalClosedType]: ...
class IntervalTree(IntervalMixin):
def __init__(
self,
left: np.ndarray,
right: np.ndarray,
closed: IntervalClosedType = ...,
leaf_size: int = ...,
) -> None: ...
@property
def mid(self) -> np.ndarray: ...
@property
def length(self) -> np.ndarray: ...
def get_indexer(self, target) -> npt.NDArray[np.intp]: ...
def get_indexer_non_unique(
self, target
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
_na_count: int
@property
def is_overlapping(self) -> bool: ...
@property
def is_monotonic_increasing(self) -> bool: ...
def clear_mapping(self) -> None: ...

View file

@ -0,0 +1,78 @@
import numpy as np
from pandas._typing import npt
def inner_join(
left: np.ndarray, # const intp_t[:]
right: np.ndarray, # const intp_t[:]
max_groups: int,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def left_outer_join(
left: np.ndarray, # const intp_t[:]
right: np.ndarray, # const intp_t[:]
max_groups: int,
sort: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def full_outer_join(
left: np.ndarray, # const intp_t[:]
right: np.ndarray, # const intp_t[:]
max_groups: int,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def ffill_indexer(
indexer: np.ndarray, # const intp_t[:]
) -> npt.NDArray[np.intp]: ...
def left_join_indexer_unique(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> npt.NDArray[np.intp]: ...
def left_join_indexer(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> tuple[
np.ndarray, # np.ndarray[join_t]
npt.NDArray[np.intp],
npt.NDArray[np.intp],
]: ...
def inner_join_indexer(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> tuple[
np.ndarray, # np.ndarray[join_t]
npt.NDArray[np.intp],
npt.NDArray[np.intp],
]: ...
def outer_join_indexer(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> tuple[
np.ndarray, # np.ndarray[join_t]
npt.NDArray[np.intp],
npt.NDArray[np.intp],
]: ...
def asof_join_backward_on_X_by_Y(
left_values: np.ndarray, # ndarray[numeric_t]
right_values: np.ndarray, # ndarray[numeric_t]
left_by_values: np.ndarray, # ndarray[by_t]
right_by_values: np.ndarray, # ndarray[by_t]
allow_exact_matches: bool = ...,
tolerance: np.number | float | None = ...,
use_hashtable: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def asof_join_forward_on_X_by_Y(
left_values: np.ndarray, # ndarray[numeric_t]
right_values: np.ndarray, # ndarray[numeric_t]
left_by_values: np.ndarray, # ndarray[by_t]
right_by_values: np.ndarray, # ndarray[by_t]
allow_exact_matches: bool = ...,
tolerance: np.number | float | None = ...,
use_hashtable: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def asof_join_nearest_on_X_by_Y(
left_values: np.ndarray, # ndarray[numeric_t]
right_values: np.ndarray, # ndarray[numeric_t]
left_by_values: np.ndarray, # ndarray[by_t]
right_by_values: np.ndarray, # ndarray[by_t]
allow_exact_matches: bool = ...,
tolerance: np.number | float | None = ...,
use_hashtable: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...

View file

@ -0,0 +1,23 @@
from typing import (
Any,
Callable,
)
def ujson_dumps(
obj: Any,
ensure_ascii: bool = ...,
double_precision: int = ...,
indent: int = ...,
orient: str = ...,
date_unit: str = ...,
iso_dates: bool = ...,
default_handler: None
| Callable[[Any], str | float | bool | list | dict | None] = ...,
) -> str: ...
def ujson_loads(
s: str,
precise_float: bool = ...,
numpy: bool = ...,
dtype: None = ...,
labelled: bool = ...,
) -> Any: ...

View file

@ -0,0 +1,207 @@
# TODO(npdtypes): Many types specified here can be made more specific/accurate;
# the more specific versions are specified in comments
from decimal import Decimal
from typing import (
Any,
Callable,
Final,
Generator,
Hashable,
Literal,
TypeAlias,
overload,
)
import numpy as np
from pandas._libs.interval import Interval
from pandas._libs.tslibs import Period
from pandas._typing import (
ArrayLike,
DtypeObj,
TypeGuard,
npt,
)
# placeholder until we can specify np.ndarray[object, ndim=2]
ndarray_obj_2d = np.ndarray
from enum import Enum
class _NoDefault(Enum):
no_default = ...
no_default: Final = _NoDefault.no_default
NoDefault: TypeAlias = Literal[_NoDefault.no_default]
i8max: int
u8max: int
def is_np_dtype(dtype: object, kinds: str | None = ...) -> TypeGuard[np.dtype]: ...
def item_from_zerodim(val: object) -> object: ...
def infer_dtype(value: object, skipna: bool = ...) -> str: ...
def is_iterator(obj: object) -> bool: ...
def is_scalar(val: object) -> bool: ...
def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
def is_pyarrow_array(obj: object) -> bool: ...
def is_period(val: object) -> TypeGuard[Period]: ...
def is_interval(val: object) -> TypeGuard[Interval]: ...
def is_decimal(val: object) -> TypeGuard[Decimal]: ...
def is_complex(val: object) -> TypeGuard[complex]: ...
def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ...
def is_integer(val: object) -> TypeGuard[int | np.integer]: ...
def is_int_or_none(obj) -> bool: ...
def is_float(val: object) -> TypeGuard[float]: ...
def is_interval_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray) -> bool: ...
def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
def is_time_array(values: np.ndarray, skipna: bool = ...): ...
def is_date_array(values: np.ndarray, skipna: bool = ...): ...
def is_datetime_array(values: np.ndarray, skipna: bool = ...): ...
def is_string_array(values: np.ndarray, skipna: bool = ...): ...
def is_float_array(values: np.ndarray, skipna: bool = ...): ...
def is_integer_array(values: np.ndarray, skipna: bool = ...): ...
def is_bool_array(values: np.ndarray, skipna: bool = ...): ...
def fast_multiget(mapping: dict, keys: np.ndarray, default=...) -> np.ndarray: ...
def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ...
def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ...
def map_infer(
arr: np.ndarray,
f: Callable[[Any], Any],
convert: bool = ...,
ignore_na: bool = ...,
) -> np.ndarray: ...
@overload
def maybe_convert_objects(
objects: npt.NDArray[np.object_],
*,
try_float: bool = ...,
safe: bool = ...,
convert_numeric: bool = ...,
convert_non_numeric: Literal[False] = ...,
convert_to_nullable_dtype: Literal[False] = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> npt.NDArray[np.object_ | np.number]: ...
@overload
def maybe_convert_objects(
objects: npt.NDArray[np.object_],
*,
try_float: bool = ...,
safe: bool = ...,
convert_numeric: bool = ...,
convert_non_numeric: bool = ...,
convert_to_nullable_dtype: Literal[True] = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...
@overload
def maybe_convert_objects(
objects: npt.NDArray[np.object_],
*,
try_float: bool = ...,
safe: bool = ...,
convert_numeric: bool = ...,
convert_non_numeric: bool = ...,
convert_to_nullable_dtype: bool = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...
@overload
def maybe_convert_numeric(
values: npt.NDArray[np.object_],
na_values: set,
convert_empty: bool = ...,
coerce_numeric: bool = ...,
convert_to_masked_nullable: Literal[False] = ...,
) -> tuple[np.ndarray, None]: ...
@overload
def maybe_convert_numeric(
values: npt.NDArray[np.object_],
na_values: set,
convert_empty: bool = ...,
coerce_numeric: bool = ...,
*,
convert_to_masked_nullable: Literal[True],
) -> tuple[np.ndarray, np.ndarray]: ...
# TODO: restrict `arr`?
def ensure_string_array(
arr,
na_value: object = ...,
convert_na_value: bool = ...,
copy: bool = ...,
skipna: bool = ...,
) -> npt.NDArray[np.object_]: ...
def convert_nans_to_NA(
arr: npt.NDArray[np.object_],
) -> npt.NDArray[np.object_]: ...
def fast_zip(ndarrays: list) -> npt.NDArray[np.object_]: ...
# TODO: can we be more specific about rows?
def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ...
def tuples_to_object_array(
tuples: npt.NDArray[np.object_],
) -> ndarray_obj_2d: ...
# TODO: can we be more specific about rows?
def to_object_array(rows: object, min_width: int = ...) -> ndarray_obj_2d: ...
def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ...
def maybe_booleans_to_slice(
mask: npt.NDArray[np.uint8],
) -> slice | npt.NDArray[np.uint8]: ...
def maybe_indices_to_slice(
indices: npt.NDArray[np.intp],
max_len: int,
) -> slice | npt.NDArray[np.intp]: ...
def is_all_arraylike(obj: list) -> bool: ...
# -----------------------------------------------------------------
# Functions which in reality take memoryviews
def memory_usage_of_objects(arr: np.ndarray) -> int: ... # object[:] # np.int64
def map_infer_mask(
arr: np.ndarray,
f: Callable[[Any], Any],
mask: np.ndarray, # const uint8_t[:]
convert: bool = ...,
na_value: Any = ...,
dtype: np.dtype = ...,
) -> np.ndarray: ...
def indices_fast(
index: npt.NDArray[np.intp],
labels: np.ndarray, # const int64_t[:]
keys: list,
sorted_labels: list[npt.NDArray[np.int64]],
) -> dict[Hashable, npt.NDArray[np.intp]]: ...
def generate_slices(
labels: np.ndarray, ngroups: int # const intp_t[:]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
def count_level_2d(
mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True],
labels: np.ndarray, # const intp_t[:]
max_bin: int,
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2]
def get_level_sorter(
label: np.ndarray, # const int64_t[:]
starts: np.ndarray, # const intp_t[:]
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
def generate_bins_dt64(
values: npt.NDArray[np.int64],
binner: np.ndarray, # const int64_t[:]
closed: object = ...,
hasnans: bool = ...,
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
def array_equivalent_object(
left: npt.NDArray[np.object_],
right: npt.NDArray[np.object_],
) -> bool: ...
def has_infs(arr: np.ndarray) -> bool: ... # const floating[:]
def has_only_ints_or_nan(arr: np.ndarray) -> bool: ... # const floating[:]
def get_reverse_indexer(
indexer: np.ndarray, # const intp_t[:]
length: int,
) -> npt.NDArray[np.intp]: ...
def is_bool_list(obj: list) -> bool: ...
def dtypes_all_equal(types: list[DtypeObj]) -> bool: ...
def is_range_indexer(
left: np.ndarray, n: int # np.ndarray[np.int64, ndim=1]
) -> bool: ...

View file

@ -0,0 +1,17 @@
import numpy as np
from numpy import typing as npt
class NAType:
def __new__(cls, *args, **kwargs): ...
NA: NAType
def is_matching_na(
left: object, right: object, nan_matches_none: bool = ...
) -> bool: ...
def isposinf_scalar(val: object) -> bool: ...
def isneginf_scalar(val: object) -> bool: ...
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

View file

@ -0,0 +1,51 @@
from typing import (
Any,
Callable,
Iterable,
Literal,
TypeAlias,
overload,
)
import numpy as np
from pandas._typing import npt
_BinOp: TypeAlias = Callable[[Any, Any], Any]
_BoolOp: TypeAlias = Callable[[Any, Any], bool]
def scalar_compare(
values: np.ndarray, # object[:]
val: object,
op: _BoolOp, # {operator.eq, operator.ne, ...}
) -> npt.NDArray[np.bool_]: ...
def vec_compare(
left: npt.NDArray[np.object_],
right: npt.NDArray[np.object_],
op: _BoolOp, # {operator.eq, operator.ne, ...}
) -> npt.NDArray[np.bool_]: ...
def scalar_binop(
values: np.ndarray, # object[:]
val: object,
op: _BinOp, # binary operator
) -> np.ndarray: ...
def vec_binop(
left: np.ndarray, # object[:]
right: np.ndarray, # object[:]
op: _BinOp, # binary operator
) -> np.ndarray: ...
@overload
def maybe_convert_bool(
arr: npt.NDArray[np.object_],
true_values: Iterable = ...,
false_values: Iterable = ...,
convert_to_masked_nullable: Literal[False] = ...,
) -> tuple[np.ndarray, None]: ...
@overload
def maybe_convert_bool(
arr: npt.NDArray[np.object_],
true_values: Iterable = ...,
false_values: Iterable = ...,
*,
convert_to_masked_nullable: Literal[True],
) -> tuple[np.ndarray, np.ndarray]: ...

View file

@ -0,0 +1,5 @@
import numpy as np
def maybe_dispatch_ufunc_to_dunder_op(
self, ufunc: np.ufunc, method: str, *inputs, **kwargs
): ...

View file

@ -0,0 +1,77 @@
from typing import (
Hashable,
Literal,
)
import numpy as np
from pandas._typing import (
ArrayLike,
Dtype,
npt,
)
STR_NA_VALUES: set[str]
DEFAULT_BUFFER_HEURISTIC: int
def sanitize_objects(
values: npt.NDArray[np.object_],
na_values: set,
) -> int: ...
class TextReader:
unnamed_cols: set[str]
table_width: int # int64_t
leading_cols: int # int64_t
header: list[list[int]] # non-negative integers
def __init__(
self,
source,
delimiter: bytes | str = ..., # single-character only
header=...,
header_start: int = ..., # int64_t
header_end: int = ..., # uint64_t
index_col=...,
names=...,
tokenize_chunksize: int = ..., # int64_t
delim_whitespace: bool = ...,
converters=...,
skipinitialspace: bool = ...,
escapechar: bytes | str | None = ..., # single-character only
doublequote: bool = ...,
quotechar: str | bytes | None = ..., # at most 1 character
quoting: int = ...,
lineterminator: bytes | str | None = ..., # at most 1 character
comment=...,
decimal: bytes | str = ..., # single-character only
thousands: bytes | str | None = ..., # single-character only
dtype: Dtype | dict[Hashable, Dtype] = ...,
usecols=...,
error_bad_lines: bool = ...,
warn_bad_lines: bool = ...,
na_filter: bool = ...,
na_values=...,
na_fvalues=...,
keep_default_na: bool = ...,
true_values=...,
false_values=...,
allow_leading_cols: bool = ...,
skiprows=...,
skipfooter: int = ..., # int64_t
verbose: bool = ...,
float_precision: Literal["round_trip", "legacy", "high"] | None = ...,
skip_blank_lines: bool = ...,
encoding_errors: bytes | str = ...,
) -> None: ...
def set_noconvert(self, i: int) -> None: ...
def remove_noconvert(self, i: int) -> None: ...
def close(self) -> None: ...
def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ...
def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ...
# _maybe_upcast, na_values are only exposed for testing
na_values: dict
def _maybe_upcast(
arr, use_dtype_backend: bool = ..., dtype_backend: str = ...
) -> np.ndarray: ...

View file

@ -0,0 +1,27 @@
from typing import (
Sequence,
overload,
)
from pandas._typing import (
AnyArrayLike,
DataFrame,
Index,
Series,
)
# note: this is a lie to make type checkers happy (they special
# case property). cache_readonly uses attribute names similar to
# property (fget) but it does not provide fset and fdel.
cache_readonly = property
class AxisProperty:
axis: int
def __init__(self, axis: int = ..., doc: str = ...) -> None: ...
@overload
def __get__(self, obj: DataFrame | Series, type) -> Index: ...
@overload
def __get__(self, obj: None, type) -> AxisProperty: ...
def __set__(
self, obj: DataFrame | Series, value: AnyArrayLike | Sequence
) -> None: ...

View file

@ -0,0 +1,16 @@
import numpy as np
from pandas._typing import npt
def unstack(
values: np.ndarray, # reshape_t[:, :]
mask: np.ndarray, # const uint8_t[:]
stride: int,
length: int,
width: int,
new_values: np.ndarray, # reshape_t[:, :]
new_mask: np.ndarray, # uint8_t[:, :]
) -> None: ...
def explode(
values: npt.NDArray[np.object_],
) -> tuple[npt.NDArray[np.object_], npt.NDArray[np.int64]]: ...

View file

@ -0,0 +1,7 @@
from pandas.io.sas.sas7bdat import SAS7BDATReader
class Parser:
def __init__(self, parser: SAS7BDATReader) -> None: ...
def read(self, nrows: int) -> None: ...
def get_subheader_index(signature: bytes) -> int: ...

View file

@ -0,0 +1,47 @@
from typing import Sequence
import numpy as np
from pandas._typing import (
Self,
npt,
)
class SparseIndex:
length: int
npoints: int
def __init__(self) -> None: ...
@property
def ngaps(self) -> int: ...
@property
def nbytes(self) -> int: ...
@property
def indices(self) -> npt.NDArray[np.int32]: ...
def equals(self, other) -> bool: ...
def lookup(self, index: int) -> np.int32: ...
def lookup_array(self, indexer: npt.NDArray[np.int32]) -> npt.NDArray[np.int32]: ...
def to_int_index(self) -> IntIndex: ...
def to_block_index(self) -> BlockIndex: ...
def intersect(self, y_: SparseIndex) -> Self: ...
def make_union(self, y_: SparseIndex) -> Self: ...
class IntIndex(SparseIndex):
indices: npt.NDArray[np.int32]
def __init__(
self, length: int, indices: Sequence[int], check_integrity: bool = ...
) -> None: ...
class BlockIndex(SparseIndex):
nblocks: int
blocs: np.ndarray
blengths: np.ndarray
def __init__(
self, length: int, blocs: np.ndarray, blengths: np.ndarray
) -> None: ...
def make_mask_object_ndarray(
arr: npt.NDArray[np.object_], fill_value
) -> npt.NDArray[np.bool_]: ...
def get_blocks(
indices: npt.NDArray[np.int32],
) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.int32]]: ...

View file

@ -0,0 +1,12 @@
def assert_dict_equal(a, b, compare_keys: bool = ...): ...
def assert_almost_equal(
a,
b,
rtol: float = ...,
atol: float = ...,
check_dtype: bool = ...,
obj=...,
lobj=...,
robj=...,
index_values=...,
): ...

View file

@ -0,0 +1,32 @@
from datetime import tzinfo
import numpy as np
from pandas._typing import npt
def format_array_from_datetime(
values: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
format: str | None = ...,
na_rep: str | float = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.object_]: ...
def array_with_unit_to_datetime(
values: npt.NDArray[np.object_],
unit: str,
errors: str = ...,
) -> tuple[np.ndarray, tzinfo | None]: ...
def first_non_null(values: np.ndarray) -> int: ...
def array_to_datetime(
values: npt.NDArray[np.object_],
errors: str = ...,
dayfirst: bool = ...,
yearfirst: bool = ...,
utc: bool = ...,
) -> tuple[np.ndarray, tzinfo | None]: ...
# returned ndarray may be object dtype or datetime64[ns]
def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo
) -> npt.NDArray[np.int64]: ...

View file

@ -0,0 +1,85 @@
__all__ = [
"dtypes",
"localize_pydatetime",
"NaT",
"NaTType",
"iNaT",
"nat_strings",
"OutOfBoundsDatetime",
"OutOfBoundsTimedelta",
"IncompatibleFrequency",
"Period",
"Resolution",
"Timedelta",
"normalize_i8_timestamps",
"is_date_array_normalized",
"dt64arr_to_periodarr",
"delta_to_nanoseconds",
"ints_to_pydatetime",
"ints_to_pytimedelta",
"get_resolution",
"Timestamp",
"tz_convert_from_utc_single",
"tz_convert_from_utc",
"to_offset",
"Tick",
"BaseOffset",
"tz_compare",
"is_unitless",
"astype_overflowsafe",
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"is_supported_unit",
"npy_unit_to_abbrev",
"get_supported_reso",
]
from pandas._libs.tslibs import dtypes # pylint: disable=import-self
from pandas._libs.tslibs.conversion import localize_pydatetime
from pandas._libs.tslibs.dtypes import (
Resolution,
get_supported_reso,
is_supported_unit,
npy_unit_to_abbrev,
periods_per_day,
periods_per_second,
)
from pandas._libs.tslibs.nattype import (
NaT,
NaTType,
iNaT,
nat_strings,
)
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
astype_overflowsafe,
is_unitless,
py_get_unit_from_dtype as get_unit_from_dtype,
)
from pandas._libs.tslibs.offsets import (
BaseOffset,
Tick,
to_offset,
)
from pandas._libs.tslibs.period import (
IncompatibleFrequency,
Period,
)
from pandas._libs.tslibs.timedeltas import (
Timedelta,
delta_to_nanoseconds,
ints_to_pytimedelta,
)
from pandas._libs.tslibs.timestamps import Timestamp
from pandas._libs.tslibs.timezones import tz_compare
from pandas._libs.tslibs.tzconversion import tz_convert_from_utc_single
from pandas._libs.tslibs.vectorized import (
dt64arr_to_periodarr,
get_resolution,
ints_to_pydatetime,
is_date_array_normalized,
normalize_i8_timestamps,
tz_convert_from_utc,
)

View file

@ -0,0 +1,12 @@
DAYS: list[str]
MONTH_ALIASES: dict[int, str]
MONTH_NUMBERS: dict[str, int]
MONTHS: list[str]
int_to_weekday: dict[int, str]
def get_firstbday(year: int, month: int) -> int: ...
def get_lastbday(year: int, month: int) -> int: ...
def get_day_of_year(year: int, month: int, day: int) -> int: ...
def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ...
def get_week_of_year(year: int, month: int, day: int) -> int: ...
def get_days_in_month(year: int, month: int) -> int: ...

View file

@ -0,0 +1,14 @@
from datetime import (
datetime,
tzinfo,
)
import numpy as np
DT64NS_DTYPE: np.dtype
TD64NS_DTYPE: np.dtype
def precision_from_unit(
unit: str,
) -> tuple[int, int]: ... # (int64_t, _)
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...

View file

@ -0,0 +1,88 @@
from enum import Enum
# These are not public API, but are exposed in the .pyi file because they
# are imported in tests.
_attrname_to_abbrevs: dict[str, str]
_period_code_map: dict[str, int]
def periods_per_day(reso: int) -> int: ...
def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...
def npy_unit_to_abbrev(reso: int) -> str: ...
def get_supported_reso(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str) -> int: ...
class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
_n: int
# actually __cinit__
def __new__(cls, code: int, n: int): ...
@property
def _freq_group_code(self) -> int: ...
@property
def _resolution_obj(self) -> Resolution: ...
def _get_to_timestamp_base(self) -> int: ...
@property
def _freqstr(self) -> str: ...
def __hash__(self) -> int: ...
def _is_tick_like(self) -> bool: ...
@property
def _creso(self) -> int: ...
@property
def _td64_unit(self) -> str: ...
class FreqGroup(Enum):
FR_ANN: int
FR_QTR: int
FR_MTH: int
FR_WK: int
FR_BUS: int
FR_DAY: int
FR_HR: int
FR_MIN: int
FR_SEC: int
FR_MS: int
FR_US: int
FR_NS: int
FR_UND: int
@staticmethod
def from_period_dtype_code(code: int) -> FreqGroup: ...
class Resolution(Enum):
RESO_NS: int
RESO_US: int
RESO_MS: int
RESO_SEC: int
RESO_MIN: int
RESO_HR: int
RESO_DAY: int
RESO_MTH: int
RESO_QTR: int
RESO_YR: int
def __lt__(self, other: Resolution) -> bool: ...
def __ge__(self, other: Resolution) -> bool: ...
@property
def attrname(self) -> str: ...
@classmethod
def from_attrname(cls, attrname: str) -> Resolution: ...
@classmethod
def get_reso_from_freqstr(cls, freq: str) -> Resolution: ...
@property
def attr_abbrev(self) -> str: ...
class NpyDatetimeUnit(Enum):
NPY_FR_Y: int
NPY_FR_M: int
NPY_FR_W: int
NPY_FR_D: int
NPY_FR_h: int
NPY_FR_m: int
NPY_FR_s: int
NPY_FR_ms: int
NPY_FR_us: int
NPY_FR_ns: int
NPY_FR_ps: int
NPY_FR_fs: int
NPY_FR_as: int
NPY_FR_GENERIC: int

View file

@ -0,0 +1,62 @@
import numpy as np
from pandas._typing import npt
def build_field_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int, # NPY_DATETIMEUNIT
) -> np.ndarray: ...
def month_position_check(fields, weekdays) -> str | None: ...
def get_date_name_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
locale: str | None = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.object_]: ...
def get_start_end_field(
dtindex: npt.NDArray[np.int64],
field: str,
freqstr: str | None = ...,
month_kw: int = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.bool_]: ...
def get_date_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int32]: ...
def get_timedelta_field(
tdindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int32]: ...
def get_timedelta_days(
tdindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...
def isleapyear_arr(
years: np.ndarray,
) -> npt.NDArray[np.bool_]: ...
def build_isocalendar_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int, # NPY_DATETIMEUNIT
) -> np.ndarray: ...
def _get_locale_names(name_type: str, locale: str | None = ...): ...
class RoundTo:
@property
def MINUS_INFTY(self) -> int: ...
@property
def PLUS_INFTY(self) -> int: ...
@property
def NEAREST_HALF_EVEN(self) -> int: ...
@property
def NEAREST_HALF_PLUS_INFTY(self) -> int: ...
@property
def NEAREST_HALF_MINUS_INFTY(self) -> int: ...
def round_nsint64(
values: npt.NDArray[np.int64],
mode: RoundTo,
nanos: int,
) -> npt.NDArray[np.int64]: ...

View file

@ -0,0 +1,135 @@
from datetime import (
datetime,
timedelta,
tzinfo as _tzinfo,
)
import typing
import numpy as np
from pandas._libs.tslibs.period import Period
NaT: NaTType
iNaT: int
nat_strings: set[str]
_NaTComparisonTypes: typing.TypeAlias = (
datetime | timedelta | Period | np.datetime64 | np.timedelta64
)
class _NatComparison:
def __call__(self, other: _NaTComparisonTypes) -> bool: ...
class NaTType:
_value: np.int64
@property
def value(self) -> int: ...
@property
def asm8(self) -> np.datetime64: ...
def to_datetime64(self) -> np.datetime64: ...
def to_numpy(
self, dtype: np.dtype | str | None = ..., copy: bool = ...
) -> np.datetime64 | np.timedelta64: ...
@property
def is_leap_year(self) -> bool: ...
@property
def is_month_start(self) -> bool: ...
@property
def is_quarter_start(self) -> bool: ...
@property
def is_year_start(self) -> bool: ...
@property
def is_month_end(self) -> bool: ...
@property
def is_quarter_end(self) -> bool: ...
@property
def is_year_end(self) -> bool: ...
@property
def day_of_year(self) -> float: ...
@property
def dayofyear(self) -> float: ...
@property
def days_in_month(self) -> float: ...
@property
def daysinmonth(self) -> float: ...
@property
def day_of_week(self) -> float: ...
@property
def dayofweek(self) -> float: ...
@property
def week(self) -> float: ...
@property
def weekofyear(self) -> float: ...
def day_name(self) -> float: ...
def month_name(self) -> float: ...
def weekday(self) -> float: ...
def isoweekday(self) -> float: ...
def total_seconds(self) -> float: ...
def today(self, *args, **kwargs) -> NaTType: ...
def now(self, *args, **kwargs) -> NaTType: ...
def to_pydatetime(self) -> NaTType: ...
def date(self) -> NaTType: ...
def round(self) -> NaTType: ...
def floor(self) -> NaTType: ...
def ceil(self) -> NaTType: ...
@property
def tzinfo(self) -> None: ...
@property
def tz(self) -> None: ...
def tz_convert(self, tz: _tzinfo | str | None) -> NaTType: ...
def tz_localize(
self,
tz: _tzinfo | str | None,
ambiguous: str = ...,
nonexistent: str = ...,
) -> NaTType: ...
def replace(
self,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
nanosecond: int | None = ...,
tzinfo: _tzinfo | None = ...,
fold: int | None = ...,
) -> NaTType: ...
@property
def year(self) -> float: ...
@property
def quarter(self) -> float: ...
@property
def month(self) -> float: ...
@property
def day(self) -> float: ...
@property
def hour(self) -> float: ...
@property
def minute(self) -> float: ...
@property
def second(self) -> float: ...
@property
def millisecond(self) -> float: ...
@property
def microsecond(self) -> float: ...
@property
def nanosecond(self) -> float: ...
# inject Timedelta properties
@property
def days(self) -> float: ...
@property
def microseconds(self) -> float: ...
@property
def nanoseconds(self) -> float: ...
# inject Period properties
@property
def qyear(self) -> float: ...
def __eq__(self, other: object) -> bool: ...
def __ne__(self, other: object) -> bool: ...
__lt__: _NatComparison
__le__: _NatComparison
__gt__: _NatComparison
__ge__: _NatComparison
def as_unit(self, unit: str, round_ok: bool = ...) -> NaTType: ...

View file

@ -0,0 +1,21 @@
import numpy as np
from pandas._typing import npt
class OutOfBoundsDatetime(ValueError): ...
class OutOfBoundsTimedelta(ValueError): ...
# only exposed for testing
def py_get_unit_from_dtype(dtype: np.dtype): ...
def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
def astype_overflowsafe(
arr: np.ndarray,
dtype: np.dtype,
copy: bool = ...,
round_ok: bool = ...,
is_coerce: bool = ...,
) -> np.ndarray: ...
def is_unitless(dtype: np.dtype) -> bool: ...
def compare_mismatched_resolutions(
left: np.ndarray, right: np.ndarray, op
) -> npt.NDArray[np.bool_]: ...

View file

@ -0,0 +1,283 @@
from datetime import (
datetime,
time,
timedelta,
)
from typing import (
Any,
Collection,
Literal,
TypeVar,
overload,
)
import numpy as np
from pandas._libs.tslibs.nattype import NaTType
from pandas._typing import (
OffsetCalendar,
Self,
npt,
)
from .timedeltas import Timedelta
_BaseOffsetT = TypeVar("_BaseOffsetT", bound=BaseOffset)
_DatetimeT = TypeVar("_DatetimeT", bound=datetime)
_TimedeltaT = TypeVar("_TimedeltaT", bound=timedelta)
_relativedelta_kwds: set[str]
prefix_mapping: dict[str, type]
class ApplyTypeError(TypeError): ...
class BaseOffset:
n: int
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
def __eq__(self, other) -> bool: ...
def __ne__(self, other) -> bool: ...
def __hash__(self) -> int: ...
@property
def kwds(self) -> dict: ...
@property
def base(self) -> BaseOffset: ...
@overload
def __add__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ...
@overload
def __add__(self, other: BaseOffset) -> Self: ...
@overload
def __add__(self, other: _DatetimeT) -> _DatetimeT: ...
@overload
def __add__(self, other: _TimedeltaT) -> _TimedeltaT: ...
@overload
def __radd__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ...
@overload
def __radd__(self, other: BaseOffset) -> Self: ...
@overload
def __radd__(self, other: _DatetimeT) -> _DatetimeT: ...
@overload
def __radd__(self, other: _TimedeltaT) -> _TimedeltaT: ...
@overload
def __radd__(self, other: NaTType) -> NaTType: ...
def __sub__(self, other: BaseOffset) -> Self: ...
@overload
def __rsub__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ...
@overload
def __rsub__(self, other: BaseOffset): ...
@overload
def __rsub__(self, other: _DatetimeT) -> _DatetimeT: ...
@overload
def __rsub__(self, other: _TimedeltaT) -> _TimedeltaT: ...
@overload
def __mul__(self, other: np.ndarray) -> np.ndarray: ...
@overload
def __mul__(self, other: int): ...
@overload
def __rmul__(self, other: np.ndarray) -> np.ndarray: ...
@overload
def __rmul__(self, other: int) -> Self: ...
def __neg__(self) -> Self: ...
def copy(self) -> Self: ...
@property
def name(self) -> str: ...
@property
def rule_code(self) -> str: ...
@property
def freqstr(self) -> str: ...
def _apply(self, other): ...
def _apply_array(self, dtarr) -> None: ...
def rollback(self, dt: datetime) -> datetime: ...
def rollforward(self, dt: datetime) -> datetime: ...
def is_on_offset(self, dt: datetime) -> bool: ...
def __setstate__(self, state) -> None: ...
def __getstate__(self): ...
@property
def nanos(self) -> int: ...
def is_anchored(self) -> bool: ...
def _get_offset(name: str) -> BaseOffset: ...
class SingleConstructorOffset(BaseOffset):
@classmethod
def _from_name(cls, suffix: None = ...): ...
def __reduce__(self): ...
@overload
def to_offset(freq: None) -> None: ...
@overload
def to_offset(freq: _BaseOffsetT) -> _BaseOffsetT: ...
@overload
def to_offset(freq: timedelta | str) -> BaseOffset: ...
class Tick(SingleConstructorOffset):
_creso: int
_prefix: str
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
@property
def delta(self) -> Timedelta: ...
@property
def nanos(self) -> int: ...
def delta_to_tick(delta: timedelta) -> Tick: ...
class Day(Tick): ...
class Hour(Tick): ...
class Minute(Tick): ...
class Second(Tick): ...
class Milli(Tick): ...
class Micro(Tick): ...
class Nano(Tick): ...
class RelativeDeltaOffset(BaseOffset):
def __init__(self, n: int = ..., normalize: bool = ..., **kwds: Any) -> None: ...
class BusinessMixin(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., offset: timedelta = ...
) -> None: ...
class BusinessDay(BusinessMixin): ...
class BusinessHour(BusinessMixin):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
start: str | time | Collection[str | time] = ...,
end: str | time | Collection[str | time] = ...,
offset: timedelta = ...,
) -> None: ...
class WeekOfMonthMixin(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., weekday: int = ...
) -> None: ...
class YearOffset(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., month: int | None = ...
) -> None: ...
class BYearEnd(YearOffset): ...
class BYearBegin(YearOffset): ...
class YearEnd(YearOffset): ...
class YearBegin(YearOffset): ...
class QuarterOffset(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., startingMonth: int | None = ...
) -> None: ...
class BQuarterEnd(QuarterOffset): ...
class BQuarterBegin(QuarterOffset): ...
class QuarterEnd(QuarterOffset): ...
class QuarterBegin(QuarterOffset): ...
class MonthOffset(SingleConstructorOffset): ...
class MonthEnd(MonthOffset): ...
class MonthBegin(MonthOffset): ...
class BusinessMonthEnd(MonthOffset): ...
class BusinessMonthBegin(MonthOffset): ...
class SemiMonthOffset(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., day_of_month: int | None = ...
) -> None: ...
class SemiMonthEnd(SemiMonthOffset): ...
class SemiMonthBegin(SemiMonthOffset): ...
class Week(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., weekday: int | None = ...
) -> None: ...
class WeekOfMonth(WeekOfMonthMixin):
def __init__(
self, n: int = ..., normalize: bool = ..., week: int = ..., weekday: int = ...
) -> None: ...
class LastWeekOfMonth(WeekOfMonthMixin): ...
class FY5253Mixin(SingleConstructorOffset):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekday: int = ...,
startingMonth: int = ...,
variation: Literal["nearest", "last"] = ...,
) -> None: ...
class FY5253(FY5253Mixin): ...
class FY5253Quarter(FY5253Mixin):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekday: int = ...,
startingMonth: int = ...,
qtr_with_extra_week: int = ...,
variation: Literal["nearest", "last"] = ...,
) -> None: ...
class Easter(SingleConstructorOffset): ...
class _CustomBusinessMonth(BusinessMixin):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekmask: str = ...,
holidays: list | None = ...,
calendar: OffsetCalendar | None = ...,
offset: timedelta = ...,
) -> None: ...
class CustomBusinessDay(BusinessDay):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekmask: str = ...,
holidays: list | None = ...,
calendar: OffsetCalendar | None = ...,
offset: timedelta = ...,
) -> None: ...
class CustomBusinessHour(BusinessHour):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekmask: str = ...,
holidays: list | None = ...,
calendar: OffsetCalendar | None = ...,
start: str | time | Collection[str | time] = ...,
end: str | time | Collection[str | time] = ...,
offset: timedelta = ...,
) -> None: ...
class CustomBusinessMonthEnd(_CustomBusinessMonth): ...
class CustomBusinessMonthBegin(_CustomBusinessMonth): ...
class OffsetMeta(type): ...
class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): ...
BDay = BusinessDay
BMonthEnd = BusinessMonthEnd
BMonthBegin = BusinessMonthBegin
CBMonthEnd = CustomBusinessMonthEnd
CBMonthBegin = CustomBusinessMonthBegin
CDay = CustomBusinessDay
def roll_qtrday(
other: datetime, n: int, month: int, day_opt: str, modby: int
) -> int: ...
INVALID_FREQ_ERR_MSG: Literal["Invalid frequency: {0}"]
def shift_months(
dtindex: npt.NDArray[np.int64], months: int, day_opt: str | None = ...
) -> npt.NDArray[np.int64]: ...
_offset_map: dict[str, BaseOffset]

View file

@ -0,0 +1,38 @@
from datetime import datetime
import numpy as np
from pandas._typing import npt
class DateParseError(ValueError): ...
def py_parse_datetime_string(
date_string: str,
dayfirst: bool = ...,
yearfirst: bool = ...,
) -> datetime: ...
def parse_datetime_string_with_reso(
date_string: str,
freq: str | None = ...,
dayfirst: bool | None = ...,
yearfirst: bool | None = ...,
) -> tuple[datetime, str]: ...
def _does_string_look_like_datetime(py_string: str) -> bool: ...
def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ...
def try_parse_dates(
values: npt.NDArray[np.object_], # object[:]
parser,
) -> npt.NDArray[np.object_]: ...
def try_parse_year_month_day(
years: npt.NDArray[np.object_], # object[:]
months: npt.NDArray[np.object_], # object[:]
days: npt.NDArray[np.object_], # object[:]
) -> npt.NDArray[np.object_]: ...
def guess_datetime_format(
dt_str,
dayfirst: bool | None = ...,
) -> str | None: ...
def concat_date_cols(
date_cols: tuple,
) -> npt.NDArray[np.object_]: ...
def get_rule_month(source: str) -> str: ...

View file

@ -0,0 +1,135 @@
from datetime import timedelta
from typing import Literal
import numpy as np
from pandas._libs.tslibs.dtypes import PeriodDtypeBase
from pandas._libs.tslibs.nattype import NaTType
from pandas._libs.tslibs.offsets import BaseOffset
from pandas._libs.tslibs.timestamps import Timestamp
from pandas._typing import (
Frequency,
npt,
)
INVALID_FREQ_ERR_MSG: str
DIFFERENT_FREQ: str
class IncompatibleFrequency(ValueError): ...
def periodarr_to_dt64arr(
periodarr: npt.NDArray[np.int64], # const int64_t[:]
freq: int,
) -> npt.NDArray[np.int64]: ...
def period_asfreq_arr(
arr: npt.NDArray[np.int64],
freq1: int,
freq2: int,
end: bool,
) -> npt.NDArray[np.int64]: ...
def get_period_field_arr(
field: str,
arr: npt.NDArray[np.int64], # const int64_t[:]
freq: int,
) -> npt.NDArray[np.int64]: ...
def from_ordinals(
values: npt.NDArray[np.int64], # const int64_t[:]
freq: timedelta | BaseOffset | str,
) -> npt.NDArray[np.int64]: ...
def extract_ordinals(
values: npt.NDArray[np.object_],
freq: Frequency | int,
) -> npt.NDArray[np.int64]: ...
def extract_freq(
values: npt.NDArray[np.object_],
) -> BaseOffset: ...
def period_array_strftime(
values: npt.NDArray[np.int64],
dtype_code: int,
na_rep,
date_format: str | None,
) -> npt.NDArray[np.object_]: ...
# exposed for tests
def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ...
def period_ordinal(
y: int, m: int, d: int, h: int, min: int, s: int, us: int, ps: int, freq: int
) -> int: ...
def freq_to_dtype_code(freq: BaseOffset) -> int: ...
def validate_end_alias(how: str) -> Literal["E", "S"]: ...
class PeriodMixin:
@property
def end_time(self) -> Timestamp: ...
@property
def start_time(self) -> Timestamp: ...
def _require_matching_freq(self, other, base: bool = ...) -> None: ...
class Period(PeriodMixin):
ordinal: int # int64_t
freq: BaseOffset
_dtype: PeriodDtypeBase
# error: "__new__" must return a class instance (got "Union[Period, NaTType]")
def __new__( # type: ignore[misc]
cls,
value=...,
freq: int | str | BaseOffset | None = ...,
ordinal: int | None = ...,
year: int | None = ...,
month: int | None = ...,
quarter: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
) -> Period | NaTType: ...
@classmethod
def _maybe_convert_freq(cls, freq) -> BaseOffset: ...
@classmethod
def _from_ordinal(cls, ordinal: int, freq) -> Period: ...
@classmethod
def now(cls, freq: BaseOffset = ...) -> Period: ...
def strftime(self, fmt: str) -> str: ...
def to_timestamp(
self,
freq: str | BaseOffset | None = ...,
how: str = ...,
) -> Timestamp: ...
def asfreq(self, freq: str | BaseOffset, how: str = ...) -> Period: ...
@property
def freqstr(self) -> str: ...
@property
def is_leap_year(self) -> bool: ...
@property
def daysinmonth(self) -> int: ...
@property
def days_in_month(self) -> int: ...
@property
def qyear(self) -> int: ...
@property
def quarter(self) -> int: ...
@property
def day_of_year(self) -> int: ...
@property
def weekday(self) -> int: ...
@property
def day_of_week(self) -> int: ...
@property
def week(self) -> int: ...
@property
def weekofyear(self) -> int: ...
@property
def second(self) -> int: ...
@property
def minute(self) -> int: ...
@property
def hour(self) -> int: ...
@property
def day(self) -> int: ...
@property
def month(self) -> int: ...
@property
def year(self) -> int: ...
def __sub__(self, other) -> Period | BaseOffset: ...
def __add__(self, other) -> Period: ...

View file

@ -0,0 +1,13 @@
import numpy as np
from pandas._typing import npt
def array_strptime(
values: npt.NDArray[np.object_],
fmt: str | None,
exact: bool = ...,
errors: str = ...,
utc: bool = ...,
) -> tuple[np.ndarray, np.ndarray]: ...
# first ndarray is M8[ns], second is object ndarray of tzinfo | None

View file

@ -0,0 +1,169 @@
from datetime import timedelta
from typing import (
ClassVar,
Literal,
TypeAlias,
TypeVar,
overload,
)
import numpy as np
from pandas._libs.tslibs import (
NaTType,
Tick,
)
from pandas._typing import (
Self,
npt,
)
# This should be kept consistent with the keys in the dict timedelta_abbrevs
# in pandas/_libs/tslibs/timedeltas.pyx
UnitChoices: TypeAlias = Literal[
"Y",
"y",
"M",
"W",
"w",
"D",
"d",
"days",
"day",
"hours",
"hour",
"hr",
"h",
"m",
"minute",
"min",
"minutes",
"T",
"t",
"s",
"seconds",
"sec",
"second",
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"L",
"l",
"us",
"microseconds",
"microsecond",
"µs",
"micro",
"micros",
"u",
"ns",
"nanoseconds",
"nano",
"nanos",
"nanosecond",
"n",
]
_S = TypeVar("_S", bound=timedelta)
def ints_to_pytimedelta(
arr: npt.NDArray[np.timedelta64],
box: bool = ...,
) -> npt.NDArray[np.object_]: ...
def array_to_timedelta64(
values: npt.NDArray[np.object_],
unit: str | None = ...,
errors: str = ...,
) -> np.ndarray: ... # np.ndarray[m8ns]
def parse_timedelta_unit(unit: str | None) -> UnitChoices: ...
def delta_to_nanoseconds(
delta: np.timedelta64 | timedelta | Tick,
reso: int = ..., # NPY_DATETIMEUNIT
round_ok: bool = ...,
) -> int: ...
def floordiv_object_array(
left: np.ndarray, right: npt.NDArray[np.object_]
) -> np.ndarray: ...
def truediv_object_array(
left: np.ndarray, right: npt.NDArray[np.object_]
) -> np.ndarray: ...
class Timedelta(timedelta):
_creso: int
min: ClassVar[Timedelta]
max: ClassVar[Timedelta]
resolution: ClassVar[Timedelta]
value: int # np.int64
_value: int # np.int64
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]")
def __new__( # type: ignore[misc]
cls: type[_S],
value=...,
unit: str | None = ...,
**kwargs: float | np.integer | np.floating,
) -> _S | NaTType: ...
@classmethod
def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ...
@property
def days(self) -> int: ...
@property
def seconds(self) -> int: ...
@property
def microseconds(self) -> int: ...
def total_seconds(self) -> float: ...
def to_pytimedelta(self) -> timedelta: ...
def to_timedelta64(self) -> np.timedelta64: ...
@property
def asm8(self) -> np.timedelta64: ...
# TODO: round/floor/ceil could return NaT?
def round(self, freq: str) -> Self: ...
def floor(self, freq: str) -> Self: ...
def ceil(self, freq: str) -> Self: ...
@property
def resolution_string(self) -> str: ...
def __add__(self, other: timedelta) -> Timedelta: ...
def __radd__(self, other: timedelta) -> Timedelta: ...
def __sub__(self, other: timedelta) -> Timedelta: ...
def __rsub__(self, other: timedelta) -> Timedelta: ...
def __neg__(self) -> Timedelta: ...
def __pos__(self) -> Timedelta: ...
def __abs__(self) -> Timedelta: ...
def __mul__(self, other: float) -> Timedelta: ...
def __rmul__(self, other: float) -> Timedelta: ...
# error: Signature of "__floordiv__" incompatible with supertype "timedelta"
@overload # type: ignore[override]
def __floordiv__(self, other: timedelta) -> int: ...
@overload
def __floordiv__(self, other: float) -> Timedelta: ...
@overload
def __floordiv__(
self, other: npt.NDArray[np.timedelta64]
) -> npt.NDArray[np.intp]: ...
@overload
def __floordiv__(
self, other: npt.NDArray[np.number]
) -> npt.NDArray[np.timedelta64] | Timedelta: ...
@overload
def __rfloordiv__(self, other: timedelta | str) -> int: ...
@overload
def __rfloordiv__(self, other: None | NaTType) -> NaTType: ...
@overload
def __rfloordiv__(self, other: np.ndarray) -> npt.NDArray[np.timedelta64]: ...
@overload
def __truediv__(self, other: timedelta) -> float: ...
@overload
def __truediv__(self, other: float) -> Timedelta: ...
def __mod__(self, other: timedelta) -> Timedelta: ...
def __divmod__(self, other: timedelta) -> tuple[int, Timedelta]: ...
def __le__(self, other: timedelta) -> bool: ...
def __lt__(self, other: timedelta) -> bool: ...
def __ge__(self, other: timedelta) -> bool: ...
def __gt__(self, other: timedelta) -> bool: ...
def __hash__(self) -> int: ...
def isoformat(self) -> str: ...
def to_numpy(self) -> np.timedelta64: ...
def view(self, dtype: npt.DTypeLike = ...) -> object: ...
@property
def unit(self) -> str: ...
def as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ...

View file

@ -0,0 +1,240 @@
from datetime import (
date as _date,
datetime,
time as _time,
timedelta,
tzinfo as _tzinfo,
)
from time import struct_time
from typing import (
ClassVar,
TypeVar,
overload,
)
import numpy as np
from pandas._libs.tslibs import (
BaseOffset,
NaTType,
Period,
Tick,
Timedelta,
)
from pandas._typing import (
Self,
TimestampNonexistent,
)
_DatetimeT = TypeVar("_DatetimeT", bound=datetime)
def integer_op_not_supported(obj: object) -> TypeError: ...
class Timestamp(datetime):
_creso: int
min: ClassVar[Timestamp]
max: ClassVar[Timestamp]
resolution: ClassVar[Timedelta]
_value: int # np.int64
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]")
def __new__( # type: ignore[misc]
cls: type[_DatetimeT],
ts_input: np.integer | float | str | _date | datetime | np.datetime64 = ...,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
tzinfo: _tzinfo | None = ...,
*,
nanosecond: int | None = ...,
tz: str | _tzinfo | None | int = ...,
unit: str | int | None = ...,
fold: int | None = ...,
) -> _DatetimeT | NaTType: ...
@classmethod
def _from_value_and_reso(
cls, value: int, reso: int, tz: _tzinfo | None
) -> Timestamp: ...
@property
def value(self) -> int: ... # np.int64
@property
def year(self) -> int: ...
@property
def month(self) -> int: ...
@property
def day(self) -> int: ...
@property
def hour(self) -> int: ...
@property
def minute(self) -> int: ...
@property
def second(self) -> int: ...
@property
def microsecond(self) -> int: ...
@property
def nanosecond(self) -> int: ...
@property
def tzinfo(self) -> _tzinfo | None: ...
@property
def tz(self) -> _tzinfo | None: ...
@property
def fold(self) -> int: ...
@classmethod
def fromtimestamp(cls, ts: float, tz: _tzinfo | None = ...) -> Self: ...
@classmethod
def utcfromtimestamp(cls, ts: float) -> Self: ...
@classmethod
def today(cls, tz: _tzinfo | str | None = ...) -> Self: ...
@classmethod
def fromordinal(
cls,
ordinal: int,
tz: _tzinfo | str | None = ...,
) -> Self: ...
@classmethod
def now(cls, tz: _tzinfo | str | None = ...) -> Self: ...
@classmethod
def utcnow(cls) -> Self: ...
# error: Signature of "combine" incompatible with supertype "datetime"
@classmethod
def combine( # type: ignore[override]
cls, date: _date, time: _time
) -> datetime: ...
@classmethod
def fromisoformat(cls, date_string: str) -> Self: ...
def strftime(self, format: str) -> str: ...
def __format__(self, fmt: str) -> str: ...
def toordinal(self) -> int: ...
def timetuple(self) -> struct_time: ...
def timestamp(self) -> float: ...
def utctimetuple(self) -> struct_time: ...
def date(self) -> _date: ...
def time(self) -> _time: ...
def timetz(self) -> _time: ...
# LSP violation: nanosecond is not present in datetime.datetime.replace
# and has positional args following it
def replace( # type: ignore[override]
self,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
nanosecond: int | None = ...,
tzinfo: _tzinfo | type[object] | None = ...,
fold: int | None = ...,
) -> Self: ...
# LSP violation: datetime.datetime.astimezone has a default value for tz
def astimezone(self, tz: _tzinfo | None) -> Self: ... # type: ignore[override]
def ctime(self) -> str: ...
def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
@classmethod
def strptime(
# Note: strptime is actually disabled and raises NotImplementedError
cls,
date_string: str,
format: str,
) -> Self: ...
def utcoffset(self) -> timedelta | None: ...
def tzname(self) -> str | None: ...
def dst(self) -> timedelta | None: ...
def __le__(self, other: datetime) -> bool: ... # type: ignore[override]
def __lt__(self, other: datetime) -> bool: ... # type: ignore[override]
def __ge__(self, other: datetime) -> bool: ... # type: ignore[override]
def __gt__(self, other: datetime) -> bool: ... # type: ignore[override]
# error: Signature of "__add__" incompatible with supertype "date"/"datetime"
@overload # type: ignore[override]
def __add__(self, other: np.ndarray) -> np.ndarray: ...
@overload
def __add__(self, other: timedelta | np.timedelta64 | Tick) -> Self: ...
def __radd__(self, other: timedelta) -> Self: ...
@overload # type: ignore[override]
def __sub__(self, other: datetime) -> Timedelta: ...
@overload
def __sub__(self, other: timedelta | np.timedelta64 | Tick) -> Self: ...
def __hash__(self) -> int: ...
def weekday(self) -> int: ...
def isoweekday(self) -> int: ...
# Return type "Tuple[int, int, int]" of "isocalendar" incompatible with return
# type "_IsoCalendarDate" in supertype "date"
def isocalendar(self) -> tuple[int, int, int]: ... # type: ignore[override]
@property
def is_leap_year(self) -> bool: ...
@property
def is_month_start(self) -> bool: ...
@property
def is_quarter_start(self) -> bool: ...
@property
def is_year_start(self) -> bool: ...
@property
def is_month_end(self) -> bool: ...
@property
def is_quarter_end(self) -> bool: ...
@property
def is_year_end(self) -> bool: ...
def to_pydatetime(self, warn: bool = ...) -> datetime: ...
def to_datetime64(self) -> np.datetime64: ...
def to_period(self, freq: BaseOffset | str = ...) -> Period: ...
def to_julian_date(self) -> np.float64: ...
@property
def asm8(self) -> np.datetime64: ...
def tz_convert(self, tz: _tzinfo | str | None) -> Self: ...
# TODO: could return NaT?
def tz_localize(
self,
tz: _tzinfo | str | None,
ambiguous: str = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def normalize(self) -> Self: ...
# TODO: round/floor/ceil could return NaT?
def round(
self,
freq: str,
ambiguous: bool | str = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def floor(
self,
freq: str,
ambiguous: bool | str = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def ceil(
self,
freq: str,
ambiguous: bool | str = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def day_name(self, locale: str | None = ...) -> str: ...
def month_name(self, locale: str | None = ...) -> str: ...
@property
def day_of_week(self) -> int: ...
@property
def dayofweek(self) -> int: ...
@property
def day_of_year(self) -> int: ...
@property
def dayofyear(self) -> int: ...
@property
def quarter(self) -> int: ...
@property
def week(self) -> int: ...
def to_numpy(
self, dtype: np.dtype | None = ..., copy: bool = ...
) -> np.datetime64: ...
@property
def _date_repr(self) -> str: ...
@property
def days_in_month(self) -> int: ...
@property
def daysinmonth(self) -> int: ...
@property
def unit(self) -> str: ...
def as_unit(self, unit: str, round_ok: bool = ...) -> Timestamp: ...

View file

@ -0,0 +1,21 @@
from datetime import (
datetime,
tzinfo,
)
from typing import Callable
import numpy as np
# imported from dateutil.tz
dateutil_gettz: Callable[[str], tzinfo]
def tz_standardize(tz: tzinfo) -> tzinfo: ...
def tz_compare(start: tzinfo | None, end: tzinfo | None) -> bool: ...
def infer_tzinfo(
start: datetime | None,
end: datetime | None,
) -> tzinfo | None: ...
def maybe_get_tz(tz: str | int | np.int64 | tzinfo | None) -> tzinfo | None: ...
def get_timezone(tz: tzinfo) -> tzinfo | str: ...
def is_utc(tz: tzinfo | None) -> bool: ...
def is_fixed_offset(tz: tzinfo) -> bool: ...

View file

@ -0,0 +1,21 @@
from datetime import (
timedelta,
tzinfo,
)
from typing import Iterable
import numpy as np
from pandas._typing import npt
# tz_convert_from_utc_single exposed for testing
def tz_convert_from_utc_single(
val: np.int64, tz: tzinfo, creso: int = ...
) -> np.int64: ...
def tz_localize_to_utc(
vals: npt.NDArray[np.int64],
tz: tzinfo | None,
ambiguous: str | bool | Iterable[bool] | None = ...,
nonexistent: str | timedelta | np.timedelta64 | None = ...,
creso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...

View file

@ -0,0 +1,43 @@
"""
For cython types that cannot be represented precisely, closest-available
python equivalents are used, and the precise types kept as adjacent comments.
"""
from datetime import tzinfo
import numpy as np
from pandas._libs.tslibs.dtypes import Resolution
from pandas._typing import npt
def dt64arr_to_periodarr(
stamps: npt.NDArray[np.int64],
freq: int,
tz: tzinfo | None,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...
def is_date_array_normalized(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None,
reso: int, # NPY_DATETIMEUNIT
) -> bool: ...
def normalize_i8_timestamps(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None,
reso: int, # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...
def get_resolution(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> Resolution: ...
def ints_to_pydatetime(
arr: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
box: str = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.object_]: ...
def tz_convert_from_utc(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...

View file

@ -0,0 +1,127 @@
from typing import (
Any,
Callable,
Literal,
)
import numpy as np
from pandas._typing import (
WindowingRankType,
npt,
)
def roll_sum(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_mean(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_var(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
ddof: int = ...,
) -> np.ndarray: ... # np.ndarray[float]
def roll_skew(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_kurt(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_median_c(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_max(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_min(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_quantile(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
quantile: float, # float64_t
interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
) -> np.ndarray: ... # np.ndarray[float]
def roll_rank(
values: np.ndarray,
start: np.ndarray,
end: np.ndarray,
minp: int,
percentile: bool,
method: WindowingRankType,
ascending: bool,
) -> np.ndarray: ... # np.ndarray[float]
def roll_apply(
obj: object,
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
function: Callable[..., Any],
raw: bool,
args: tuple[Any, ...],
kwargs: dict[str, Any],
) -> npt.NDArray[np.float64]: ...
def roll_weighted_sum(
values: np.ndarray, # const float64_t[:]
weights: np.ndarray, # const float64_t[:]
minp: int,
) -> np.ndarray: ... # np.ndarray[np.float64]
def roll_weighted_mean(
values: np.ndarray, # const float64_t[:]
weights: np.ndarray, # const float64_t[:]
minp: int,
) -> np.ndarray: ... # np.ndarray[np.float64]
def roll_weighted_var(
values: np.ndarray, # const float64_t[:]
weights: np.ndarray, # const float64_t[:]
minp: int, # int64_t
ddof: int, # unsigned int
) -> np.ndarray: ... # np.ndarray[np.float64]
def ewm(
vals: np.ndarray, # const float64_t[:]
start: np.ndarray, # const int64_t[:]
end: np.ndarray, # const int64_t[:]
minp: int,
com: float, # float64_t
adjust: bool,
ignore_na: bool,
deltas: np.ndarray, # const float64_t[:]
normalize: bool,
) -> np.ndarray: ... # np.ndarray[np.float64]
def ewmcov(
input_x: np.ndarray, # const float64_t[:]
start: np.ndarray, # const int64_t[:]
end: np.ndarray, # const int64_t[:]
minp: int,
input_y: np.ndarray, # const float64_t[:]
com: float, # float64_t
adjust: bool,
ignore_na: bool,
bias: bool,
) -> np.ndarray: ... # np.ndarray[np.float64]

View file

@ -0,0 +1,12 @@
import numpy as np
from pandas._typing import npt
def calculate_variable_window_bounds(
num_values: int, # int64_t
window_size: int, # int64_t
min_periods,
center: bool,
closed: str | None,
index: np.ndarray, # const int64_t[:]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...

View file

@ -0,0 +1,20 @@
import numpy as np
from pandas._typing import ArrayLike
def write_csv_rows(
data: list[ArrayLike],
data_index: np.ndarray,
nlevels: int,
cols: np.ndarray,
writer: object, # _csv.writer
) -> None: ...
def convert_json_to_lines(arr: str) -> str: ...
def max_len_string_array(
arr: np.ndarray, # pandas_string[:]
) -> int: ...
def word_len(val: object) -> int: ...
def string_array_replace_from_nan_rep(
arr: np.ndarray, # np.ndarray[object, ndim=1]
nan_rep: object,
) -> None: ...

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,89 @@
"""
Hypothesis data generator helpers.
"""
from datetime import datetime
from hypothesis import strategies as st
from hypothesis.extra.dateutil import timezones as dateutil_timezones
from hypothesis.extra.pytz import timezones as pytz_timezones
from pandas.compat import is_platform_windows
import pandas as pd
from pandas.tseries.offsets import (
BMonthBegin,
BMonthEnd,
BQuarterBegin,
BQuarterEnd,
BYearBegin,
BYearEnd,
MonthBegin,
MonthEnd,
QuarterBegin,
QuarterEnd,
YearBegin,
YearEnd,
)
OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3)
OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3)
OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3)
OPTIONAL_DICTS = st.lists(
st.one_of(st.none(), st.dictionaries(st.text(), st.integers())),
max_size=10,
min_size=3,
)
OPTIONAL_LISTS = st.lists(
st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)),
max_size=10,
min_size=3,
)
OPTIONAL_ONE_OF_ALL = st.one_of(
OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT
)
if is_platform_windows():
DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1))
else:
DATETIME_NO_TZ = st.datetimes()
DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),
max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
)
DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(
min_value=pd.Timestamp.min.to_pydatetime(warn=False),
max_value=pd.Timestamp.max.to_pydatetime(warn=False),
)
INT_NEG_999_TO_POS_999 = st.integers(-999, 999)
# The strategy for each type is registered in conftest.py, as they don't carry
# enough runtime information (e.g. type hints) to infer how to build them.
YQM_OFFSET = st.one_of(
*map(
st.from_type,
[
MonthBegin,
MonthEnd,
BMonthBegin,
BMonthEnd,
QuarterBegin,
QuarterEnd,
BQuarterBegin,
BQuarterEnd,
YearBegin,
YearEnd,
BYearBegin,
BYearEnd,
],
)
)

View file

@ -0,0 +1,170 @@
from __future__ import annotations
import gzip
import io
import pathlib
import tarfile
from typing import (
TYPE_CHECKING,
Any,
Callable,
)
import uuid
import zipfile
from pandas.compat import (
get_bz2_file,
get_lzma_file,
)
from pandas.compat._optional import import_optional_dependency
import pandas as pd
from pandas._testing.contexts import ensure_clean
if TYPE_CHECKING:
from pandas._typing import (
FilePath,
ReadPickleBuffer,
)
from pandas import (
DataFrame,
Series,
)
# ------------------------------------------------------------------
# File-IO
def round_trip_pickle(
obj: Any, path: FilePath | ReadPickleBuffer | None = None
) -> DataFrame | Series:
"""
Pickle an object and then read it again.
Parameters
----------
obj : any object
The object to pickle and then re-read.
path : str, path object or file-like object, default None
The path where the pickled object is written and then read.
Returns
-------
pandas object
The original object that was pickled and then re-read.
"""
_path = path
if _path is None:
_path = f"__{uuid.uuid4()}__.pickle"
with ensure_clean(_path) as temp_path:
pd.to_pickle(obj, temp_path)
return pd.read_pickle(temp_path)
def round_trip_pathlib(writer, reader, path: str | None = None):
"""
Write an object to file specified by a pathlib.Path and read it back
Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.
Returns
-------
pandas object
The original object that was serialized and then re-read.
"""
Path = pathlib.Path
if path is None:
path = "___pathlib___"
with ensure_clean(path) as path:
writer(Path(path)) # type: ignore[arg-type]
obj = reader(Path(path)) # type: ignore[arg-type]
return obj
def round_trip_localpath(writer, reader, path: str | None = None):
"""
Write an object to file specified by a py.path LocalPath and read it back.
Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.
Returns
-------
pandas object
The original object that was serialized and then re-read.
"""
import pytest
LocalPath = pytest.importorskip("py.path").local
if path is None:
path = "___localpath___"
with ensure_clean(path) as path:
writer(LocalPath(path))
obj = reader(LocalPath(path))
return obj
def write_to_compressed(compression, path, data, dest: str = "test"):
"""
Write data to a compressed file.
Parameters
----------
compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd'}
The compression type to use.
path : str
The file path to write the data.
data : str
The data to write.
dest : str, default "test"
The destination file (for ZIP only)
Raises
------
ValueError : An invalid compression value was passed in.
"""
args: tuple[Any, ...] = (data,)
mode = "wb"
method = "write"
compress_method: Callable
if compression == "zip":
compress_method = zipfile.ZipFile
mode = "w"
args = (dest, data)
method = "writestr"
elif compression == "tar":
compress_method = tarfile.TarFile
mode = "w"
file = tarfile.TarInfo(name=dest)
bytes = io.BytesIO(data)
file.size = len(data)
args = (file, bytes)
method = "addfile"
elif compression == "gzip":
compress_method = gzip.GzipFile
elif compression == "bz2":
compress_method = get_bz2_file()
elif compression == "zstd":
compress_method = import_optional_dependency("zstandard").open
elif compression == "xz":
compress_method = get_lzma_file()
else:
raise ValueError(f"Unrecognized compression type: {compression}")
with compress_method(path, mode=mode) as f:
getattr(f, method)(*args)

View file

@ -0,0 +1,227 @@
from __future__ import annotations
from contextlib import (
contextmanager,
nullcontext,
)
import re
import sys
from typing import (
TYPE_CHECKING,
Literal,
cast,
)
import warnings
from pandas.compat import PY311
if TYPE_CHECKING:
from collections.abc import (
Generator,
Sequence,
)
@contextmanager
def assert_produces_warning(
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None = Warning,
filter_level: Literal[
"error", "ignore", "always", "default", "module", "once"
] = "always",
check_stacklevel: bool = True,
raise_on_extra_warnings: bool = True,
match: str | None = None,
) -> Generator[list[warnings.WarningMessage], None, None]:
"""
Context manager for running code expected to either raise a specific warning,
multiple specific warnings, or not raise any warnings. Verifies that the code
raises the expected warning(s), and that it does not raise any other unexpected
warnings. It is basically a wrapper around ``warnings.catch_warnings``.
Parameters
----------
expected_warning : {Warning, False, tuple[Warning, ...], None}, default Warning
The type of Exception raised. ``exception.Warning`` is the base
class for all warnings. To raise multiple types of exceptions,
pass them as a tuple. To check that no warning is returned,
specify ``False`` or ``None``.
filter_level : str or None, default "always"
Specifies whether warnings are ignored, displayed, or turned
into errors.
Valid values are:
* "error" - turns matching warnings into exceptions
* "ignore" - discard the warning
* "always" - always emit a warning
* "default" - print the warning the first time it is generated
from each location
* "module" - print the warning the first time it is generated
from each module
* "once" - print the warning the first time it is generated
check_stacklevel : bool, default True
If True, displays the line that called the function containing
the warning to show were the function is called. Otherwise, the
line that implements the function is displayed.
raise_on_extra_warnings : bool, default True
Whether extra warnings not of the type `expected_warning` should
cause the test to fail.
match : str, optional
Match warning message.
Examples
--------
>>> import warnings
>>> with assert_produces_warning():
... warnings.warn(UserWarning())
...
>>> with assert_produces_warning(False):
... warnings.warn(RuntimeWarning())
...
Traceback (most recent call last):
...
AssertionError: Caused unexpected warning(s): ['RuntimeWarning'].
>>> with assert_produces_warning(UserWarning):
... warnings.warn(RuntimeWarning())
Traceback (most recent call last):
...
AssertionError: Did not see expected warning of class 'UserWarning'.
..warn:: This is *not* thread-safe.
"""
__tracebackhide__ = True
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter(filter_level)
try:
yield w
finally:
if expected_warning:
expected_warning = cast(type[Warning], expected_warning)
_assert_caught_expected_warning(
caught_warnings=w,
expected_warning=expected_warning,
match=match,
check_stacklevel=check_stacklevel,
)
if raise_on_extra_warnings:
_assert_caught_no_extra_warnings(
caught_warnings=w,
expected_warning=expected_warning,
)
def maybe_produces_warning(warning: type[Warning], condition: bool, **kwargs):
"""
Return a context manager that possibly checks a warning based on the condition
"""
if condition:
return assert_produces_warning(warning, **kwargs)
else:
return nullcontext()
def _assert_caught_expected_warning(
*,
caught_warnings: Sequence[warnings.WarningMessage],
expected_warning: type[Warning],
match: str | None,
check_stacklevel: bool,
) -> None:
"""Assert that there was the expected warning among the caught warnings."""
saw_warning = False
matched_message = False
unmatched_messages = []
for actual_warning in caught_warnings:
if issubclass(actual_warning.category, expected_warning):
saw_warning = True
if check_stacklevel:
_assert_raised_with_correct_stacklevel(actual_warning)
if match is not None:
if re.search(match, str(actual_warning.message)):
matched_message = True
else:
unmatched_messages.append(actual_warning.message)
if not saw_warning:
raise AssertionError(
f"Did not see expected warning of class "
f"{repr(expected_warning.__name__)}"
)
if match and not matched_message:
raise AssertionError(
f"Did not see warning {repr(expected_warning.__name__)} "
f"matching '{match}'. The emitted warning messages are "
f"{unmatched_messages}"
)
def _assert_caught_no_extra_warnings(
*,
caught_warnings: Sequence[warnings.WarningMessage],
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
) -> None:
"""Assert that no extra warnings apart from the expected ones are caught."""
extra_warnings = []
for actual_warning in caught_warnings:
if _is_unexpected_warning(actual_warning, expected_warning):
# GH#38630 pytest.filterwarnings does not suppress these.
if actual_warning.category == ResourceWarning:
# GH 44732: Don't make the CI flaky by filtering SSL-related
# ResourceWarning from dependencies
if "unclosed <ssl.SSLSocket" in str(actual_warning.message):
continue
# GH 44844: Matplotlib leaves font files open during the entire process
# upon import. Don't make CI flaky if ResourceWarning raised
# due to these open files.
if any("matplotlib" in mod for mod in sys.modules):
continue
if PY311 and actual_warning.category == EncodingWarning:
# EncodingWarnings are checked in the CI
# pyproject.toml errors on EncodingWarnings in pandas
# Ignore EncodingWarnings from other libraries
continue
extra_warnings.append(
(
actual_warning.category.__name__,
actual_warning.message,
actual_warning.filename,
actual_warning.lineno,
)
)
if extra_warnings:
raise AssertionError(f"Caused unexpected warning(s): {repr(extra_warnings)}")
def _is_unexpected_warning(
actual_warning: warnings.WarningMessage,
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
) -> bool:
"""Check if the actual warning issued is unexpected."""
if actual_warning and not expected_warning:
return True
expected_warning = cast(type[Warning], expected_warning)
return bool(not issubclass(actual_warning.category, expected_warning))
def _assert_raised_with_correct_stacklevel(
actual_warning: warnings.WarningMessage,
) -> None:
from inspect import (
getframeinfo,
stack,
)
caller = getframeinfo(stack()[4][0])
msg = (
"Warning not set with correct stacklevel. "
f"File where warning is raised: {actual_warning.filename} != "
f"{caller.filename}. Warning message: {actual_warning.message}"
)
assert actual_warning.filename == caller.filename, msg

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
"""
Helpers for sharing tests between DataFrame/Series
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from pandas import DataFrame
if TYPE_CHECKING:
from pandas._typing import DtypeObj
def get_dtype(obj) -> DtypeObj:
if isinstance(obj, DataFrame):
# Note: we are assuming only one column
return obj.dtypes.iat[0]
else:
return obj.dtype
def get_obj(df: DataFrame, klass):
"""
For sharing tests using frame_or_series, either return the DataFrame
unchanged or return it's first column as a Series.
"""
if klass is DataFrame:
return df
return df._ixs(0, axis=1)

View file

@ -0,0 +1,216 @@
from __future__ import annotations
from contextlib import contextmanager
import os
from pathlib import Path
import tempfile
from typing import (
IO,
TYPE_CHECKING,
Any,
)
import uuid
from pandas.compat import PYPY
from pandas.errors import ChainedAssignmentError
from pandas import set_option
from pandas.io.common import get_handle
if TYPE_CHECKING:
from collections.abc import Generator
from pandas._typing import (
BaseBuffer,
CompressionOptions,
FilePath,
)
@contextmanager
def decompress_file(
path: FilePath | BaseBuffer, compression: CompressionOptions
) -> Generator[IO[bytes], None, None]:
"""
Open a compressed file and return a file object.
Parameters
----------
path : str
The path where the file is read from.
compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
Name of the decompression to use
Returns
-------
file object
"""
with get_handle(path, "rb", compression=compression, is_text=False) as handle:
yield handle.handle
@contextmanager
def set_timezone(tz: str) -> Generator[None, None, None]:
"""
Context manager for temporarily setting a timezone.
Parameters
----------
tz : str
A string representing a valid timezone.
Examples
--------
>>> from datetime import datetime
>>> from dateutil.tz import tzlocal
>>> tzlocal().tzname(datetime(2021, 1, 1)) # doctest: +SKIP
'IST'
>>> with set_timezone('US/Eastern'):
... tzlocal().tzname(datetime(2021, 1, 1))
...
'EST'
"""
import time
def setTZ(tz) -> None:
if tz is None:
try:
del os.environ["TZ"]
except KeyError:
pass
else:
os.environ["TZ"] = tz
time.tzset()
orig_tz = os.environ.get("TZ")
setTZ(tz)
try:
yield
finally:
setTZ(orig_tz)
@contextmanager
def ensure_clean(
filename=None, return_filelike: bool = False, **kwargs: Any
) -> Generator[Any, None, None]:
"""
Gets a temporary path and agrees to remove on close.
This implementation does not use tempfile.mkstemp to avoid having a file handle.
If the code using the returned path wants to delete the file itself, windows
requires that no program has a file handle to it.
Parameters
----------
filename : str (optional)
suffix of the created file.
return_filelike : bool (default False)
if True, returns a file-like which is *always* cleaned. Necessary for
savefig and other functions which want to append extensions.
**kwargs
Additional keywords are passed to open().
"""
folder = Path(tempfile.gettempdir())
if filename is None:
filename = ""
filename = str(uuid.uuid4()) + filename
path = folder / filename
path.touch()
handle_or_str: str | IO = str(path)
encoding = kwargs.pop("encoding", None)
if return_filelike:
kwargs.setdefault("mode", "w+b")
if encoding is None and "b" not in kwargs["mode"]:
encoding = "utf-8"
handle_or_str = open(path, encoding=encoding, **kwargs)
try:
yield handle_or_str
finally:
if not isinstance(handle_or_str, str):
handle_or_str.close()
if path.is_file():
path.unlink()
@contextmanager
def with_csv_dialect(name: str, **kwargs) -> Generator[None, None, None]:
"""
Context manager to temporarily register a CSV dialect for parsing CSV.
Parameters
----------
name : str
The name of the dialect.
kwargs : mapping
The parameters for the dialect.
Raises
------
ValueError : the name of the dialect conflicts with a builtin one.
See Also
--------
csv : Python's CSV library.
"""
import csv
_BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"}
if name in _BUILTIN_DIALECTS:
raise ValueError("Cannot override builtin dialect.")
csv.register_dialect(name, **kwargs)
try:
yield
finally:
csv.unregister_dialect(name)
@contextmanager
def use_numexpr(use, min_elements=None) -> Generator[None, None, None]:
from pandas.core.computation import expressions as expr
if min_elements is None:
min_elements = expr._MIN_ELEMENTS
olduse = expr.USE_NUMEXPR
oldmin = expr._MIN_ELEMENTS
set_option("compute.use_numexpr", use)
expr._MIN_ELEMENTS = min_elements
try:
yield
finally:
expr._MIN_ELEMENTS = oldmin
set_option("compute.use_numexpr", olduse)
def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
from pandas._testing import assert_produces_warning
if PYPY and not extra_warnings:
from contextlib import nullcontext
return nullcontext()
elif PYPY and extra_warnings:
return assert_produces_warning(
extra_warnings,
match="|".join(extra_match),
)
else:
match = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment"
)
return assert_produces_warning(
(ChainedAssignmentError, *extra_warnings),
match="|".join((match, *extra_match)),
)

View file

@ -0,0 +1,476 @@
from __future__ import annotations
from collections.abc import (
Hashable,
Iterator,
Mapping,
Sequence,
)
from datetime import (
date,
datetime,
timedelta,
tzinfo,
)
from os import PathLike
import sys
from typing import (
TYPE_CHECKING,
Any,
Callable,
Literal,
Optional,
Protocol,
Type as type_t,
TypeVar,
Union,
)
import numpy as np
# To prevent import cycles place any internal imports in the branch below
# and use a string literal forward reference to it in subsequent types
# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
if TYPE_CHECKING:
import numpy.typing as npt
from pandas._libs import (
NaTType,
Period,
Timedelta,
Timestamp,
)
from pandas._libs.tslibs import BaseOffset
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas import Interval
from pandas.arrays import (
DatetimeArray,
TimedeltaArray,
)
from pandas.core.arrays.base import ExtensionArray
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.groupby.generic import (
DataFrameGroupBy,
GroupBy,
SeriesGroupBy,
)
from pandas.core.indexes.base import Index
from pandas.core.internals import (
ArrayManager,
BlockManager,
SingleArrayManager,
SingleBlockManager,
)
from pandas.core.resample import Resampler
from pandas.core.series import Series
from pandas.core.window.rolling import BaseWindow
from pandas.io.formats.format import EngFormatter
from pandas.tseries.holiday import AbstractHolidayCalendar
ScalarLike_co = Union[
int,
float,
complex,
str,
bytes,
np.generic,
]
# numpy compatible types
NumpyValueArrayLike = Union[ScalarLike_co, npt.ArrayLike]
# Name "npt._ArrayLikeInt_co" is not defined [name-defined]
NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined]
if sys.version_info >= (3, 10):
from typing import TypeGuard # pyright: ignore[reportUnusedImport]
else:
from typing_extensions import TypeGuard # pyright: ignore[reportUnusedImport]
if sys.version_info >= (3, 11):
from typing import Self # pyright: ignore[reportUnusedImport]
else:
from typing_extensions import Self # pyright: ignore[reportUnusedImport]
else:
npt: Any = None
Self: Any = None
TypeGuard: Any = None
HashableT = TypeVar("HashableT", bound=Hashable)
# array-like
ArrayLike = Union["ExtensionArray", np.ndarray]
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
# list-like
# Cannot use `Sequence` because a string is a sequence, and we don't want to
# accept that. Could refine if https://github.com/python/typing/issues/256 is
# resolved to differentiate between Sequence[str] and str
ListLike = Union[AnyArrayLike, list, range]
# scalars
PythonScalar = Union[str, float, bool]
DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, date]
IntStrT = TypeVar("IntStrT", int, str)
# timestamp and timedelta convertible types
TimestampConvertibleTypes = Union[
"Timestamp", date, np.datetime64, np.int64, float, str
]
TimestampNonexistent = Union[
Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
]
TimedeltaConvertibleTypes = Union[
"Timedelta", timedelta, np.timedelta64, np.int64, float, str
]
Timezone = Union[str, tzinfo]
ToTimestampHow = Literal["s", "e", "start", "end"]
# NDFrameT is stricter and ensures that the same subclass of NDFrame always is
# used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
# Series is passed into a function, a Series is always returned and if a DataFrame is
# passed in, a DataFrame is always returned.
NDFrameT = TypeVar("NDFrameT", bound="NDFrame")
NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index")
AxisInt = int
Axis = Union[AxisInt, Literal["index", "columns", "rows"]]
IndexLabel = Union[Hashable, Sequence[Hashable]]
Level = Hashable
Shape = tuple[int, ...]
Suffixes = tuple[Optional[str], Optional[str]]
Ordered = Optional[bool]
JSONSerializable = Optional[Union[PythonScalar, list, dict]]
Frequency = Union[str, "BaseOffset"]
Axes = ListLike
RandomState = Union[
int,
np.ndarray,
np.random.Generator,
np.random.BitGenerator,
np.random.RandomState,
]
# dtypes
NpDtype = Union[str, np.dtype, type_t[Union[str, complex, bool, object]]]
Dtype = Union["ExtensionDtype", NpDtype]
AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"]
# DtypeArg specifies all allowable dtypes in a functions its dtype argument
DtypeArg = Union[Dtype, dict[Hashable, Dtype]]
DtypeObj = Union[np.dtype, "ExtensionDtype"]
# converters
ConvertersArg = dict[Hashable, Callable[[Dtype], Dtype]]
# parse_dates
ParseDatesArg = Union[
bool, list[Hashable], list[list[Hashable]], dict[Hashable, list[Hashable]]
]
# For functions like rename that convert one label to another
Renamer = Union[Mapping[Any, Hashable], Callable[[Any], Hashable]]
# to maintain type information across generic functions and parametrization
T = TypeVar("T")
# used in decorators to preserve the signature of the function it decorates
# see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
FuncType = Callable[..., Any]
F = TypeVar("F", bound=FuncType)
# types of vectorized key functions for DataFrame::sort_values and
# DataFrame::sort_index, among others
ValueKeyFunc = Optional[Callable[["Series"], Union["Series", AnyArrayLike]]]
IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]]
# types of `func` kwarg for DataFrame.aggregate and Series.aggregate
AggFuncTypeBase = Union[Callable, str]
AggFuncTypeDict = dict[Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]]]
AggFuncType = Union[
AggFuncTypeBase,
list[AggFuncTypeBase],
AggFuncTypeDict,
]
AggObjType = Union[
"Series",
"DataFrame",
"GroupBy",
"SeriesGroupBy",
"DataFrameGroupBy",
"BaseWindow",
"Resampler",
]
PythonFuncType = Callable[[Any], Any]
# filenames and file-like-objects
AnyStr_co = TypeVar("AnyStr_co", str, bytes, covariant=True)
AnyStr_contra = TypeVar("AnyStr_contra", str, bytes, contravariant=True)
class BaseBuffer(Protocol):
@property
def mode(self) -> str:
# for _get_filepath_or_buffer
...
def seek(self, __offset: int, __whence: int = ...) -> int:
# with one argument: gzip.GzipFile, bz2.BZ2File
# with two arguments: zip.ZipFile, read_sas
...
def seekable(self) -> bool:
# for bz2.BZ2File
...
def tell(self) -> int:
# for zip.ZipFile, read_stata, to_stata
...
class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]):
def read(self, __n: int = ...) -> AnyStr_co:
# for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
...
class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]):
def write(self, __b: AnyStr_contra) -> Any:
# for gzip.GzipFile, bz2.BZ2File
...
def flush(self) -> Any:
# for gzip.GzipFile, bz2.BZ2File
...
class ReadPickleBuffer(ReadBuffer[bytes], Protocol):
def readline(self) -> bytes:
...
class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
def truncate(self, size: int | None = ...) -> int:
...
class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol):
def __iter__(self) -> Iterator[AnyStr_co]:
# for engine=python
...
def fileno(self) -> int:
# for _MMapWrapper
...
def readline(self) -> AnyStr_co:
# for engine=python
...
@property
def closed(self) -> bool:
# for enine=pyarrow
...
FilePath = Union[str, "PathLike[str]"]
# for arbitrary kwargs passed during reading/writing files
StorageOptions = Optional[dict[str, Any]]
# compression keywords and compression
CompressionDict = dict[str, Any]
CompressionOptions = Optional[
Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd", "tar"], CompressionDict]
]
# types in DataFrameFormatter
FormattersType = Union[
list[Callable], tuple[Callable, ...], Mapping[Union[str, int], Callable]
]
ColspaceType = Mapping[Hashable, Union[str, int]]
FloatFormatType = Union[str, Callable, "EngFormatter"]
ColspaceArgType = Union[
str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]]
]
# Arguments for fillna()
FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
InterpolateOptions = Literal[
"linear",
"time",
"index",
"values",
"nearest",
"zero",
"slinear",
"quadratic",
"cubic",
"barycentric",
"polynomial",
"krogh",
"piecewise_polynomial",
"spline",
"pchip",
"akima",
"cubicspline",
"from_derivatives",
]
# internals
Manager = Union[
"ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager"
]
SingleManager = Union["SingleArrayManager", "SingleBlockManager"]
Manager2D = Union["ArrayManager", "BlockManager"]
# indexing
# PositionalIndexer -> valid 1D positional indexer, e.g. can pass
# to ndarray.__getitem__
# ScalarIndexer is for a single value as the index
# SequenceIndexer is for list like or slices (but not tuples)
# PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays
# These are used in various __getitem__ overloads
# TODO(typing#684): add Ellipsis, see
# https://github.com/python/typing/issues/684#issuecomment-548203158
# https://bugs.python.org/issue41810
# Using List[int] here rather than Sequence[int] to disallow tuples.
ScalarIndexer = Union[int, np.integer]
SequenceIndexer = Union[slice, list[int], np.ndarray]
PositionalIndexer = Union[ScalarIndexer, SequenceIndexer]
PositionalIndexerTuple = tuple[PositionalIndexer, PositionalIndexer]
PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple]
if TYPE_CHECKING:
TakeIndexer = Union[Sequence[int], Sequence[np.integer], npt.NDArray[np.integer]]
else:
TakeIndexer = Any
# Shared by functions such as drop and astype
IgnoreRaise = Literal["ignore", "raise"]
# Windowing rank methods
WindowingRankType = Literal["average", "min", "max"]
# read_csv engines
CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
# read_json engines
JSONEngine = Literal["ujson", "pyarrow"]
# read_xml parsers
XMLParsers = Literal["lxml", "etree"]
# Interval closed type
IntervalLeftRight = Literal["left", "right"]
IntervalClosedType = Union[IntervalLeftRight, Literal["both", "neither"]]
# datetime and NaTType
DatetimeNaTType = Union[datetime, "NaTType"]
DateTimeErrorChoices = Union[IgnoreRaise, Literal["coerce"]]
# sort_index
SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
NaPosition = Literal["first", "last"]
# Arguments for nsmalles and n_largest
NsmallestNlargestKeep = Literal["first", "last", "all"]
# quantile interpolation
QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"]
# plotting
PlottingOrientation = Literal["horizontal", "vertical"]
# dropna
AnyAll = Literal["any", "all"]
# merge
MergeHow = Literal["left", "right", "inner", "outer", "cross"]
MergeValidate = Literal[
"one_to_one",
"1:1",
"one_to_many",
"1:m",
"many_to_one",
"m:1",
"many_to_many",
"m:m",
]
# join
JoinHow = Literal["left", "right", "inner", "outer"]
JoinValidate = Literal[
"one_to_one",
"1:1",
"one_to_many",
"1:m",
"many_to_one",
"m:1",
"many_to_many",
"m:m",
]
# reindex
ReindexMethod = Union[FillnaOptions, Literal["nearest"]]
MatplotlibColor = Union[str, Sequence[float]]
TimeGrouperOrigin = Union[
"Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"]
]
TimeAmbiguous = Union[Literal["infer", "NaT", "raise"], "npt.NDArray[np.bool_]"]
TimeNonexistent = Union[
Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
]
DropKeep = Literal["first", "last", False]
CorrelationMethod = Union[
Literal["pearson", "kendall", "spearman"], Callable[[np.ndarray, np.ndarray], float]
]
AlignJoin = Literal["outer", "inner", "left", "right"]
DtypeBackend = Literal["pyarrow", "numpy_nullable"]
TimeUnit = Literal["s", "ms", "us", "ns"]
OpenFileErrors = Literal[
"strict",
"ignore",
"replace",
"surrogateescape",
"xmlcharrefreplace",
"backslashreplace",
"namereplace",
]
# update
UpdateJoin = Literal["left"]
# applymap
NaAction = Literal["ignore"]
# from_dict
FromDictOrient = Literal["columns", "index", "tight"]
# to_gbc
ToGbqIfexist = Literal["fail", "replace", "append"]
# to_stata
ToStataByteorder = Literal[">", "<", "little", "big"]
# ExcelWriter
ExcelWriterIfSheetExists = Literal["error", "new", "replace", "overlay"]
# Offsets
OffsetCalendar = Union[np.busdaycalendar, "AbstractHolidayCalendar"]

View file

@ -0,0 +1,692 @@
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain.
# Generated by versioneer-0.28
# https://github.com/python-versioneer/python-versioneer
"""Git implementation of _version.py."""
import errno
import functools
import os
import re
import subprocess
import sys
from typing import Callable
def get_keywords():
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
git_date = "$Format:%ci$"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
def get_config():
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
cfg = VersioneerConfig()
cfg.VCS = "git"
cfg.style = "pep440"
cfg.tag_prefix = "v"
cfg.parentdir_prefix = "pandas-"
cfg.versionfile_source = "pandas/_version.py"
cfg.verbose = False
return cfg
class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY: dict[str, str] = {}
HANDLERS: dict[str, dict[str, Callable]] = {}
def register_vcs_handler(vcs, method): # decorator
"""Create decorator to mark a method as the handler of a VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
process = None
popen_kwargs = {}
if sys.platform == "win32":
# This hides the console window if pythonw.exe is used
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
popen_kwargs["startupinfo"] = startupinfo
for command in commands:
dispcmd = str([command] + args)
try:
# remember shell=False, so use git.cmd on windows, not just git
process = subprocess.Popen(
[command] + args,
cwd=cwd,
env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr else None),
**popen_kwargs,
)
break
except OSError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print(f"unable to run {dispcmd}")
print(e)
return None, None
else:
if verbose:
print(f"unable to find command, tried {commands}")
return None, None
stdout = process.communicate()[0].strip().decode()
if process.returncode != 0:
if verbose:
print(f"unable to run {dispcmd} (error)")
print(f"stdout was {stdout}")
return None, process.returncode
return stdout, process.returncode
def versions_from_parentdir(parentdir_prefix, root, verbose):
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []
for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {
"version": dirname[len(parentdir_prefix) :],
"full-revisionid": None,
"dirty": False,
"error": None,
"date": None,
}
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print(
f"Tried directories {str(rootdirs)} \
but none started with prefix {parentdir_prefix}"
)
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
with open(versionfile_abs, encoding="utf-8") as fobj:
for line in fobj:
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
except OSError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if "refnames" not in keywords:
raise NotThisMethod("Short version file found")
date = keywords.get("date")
if date is not None:
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = {r for r in refs if re.search(r"\d", r)}
if verbose:
print(f"discarding '{','.join(refs - tags)}', no digits")
if verbose:
print(f"likely tags: {','.join(sorted(tags))}")
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix) :]
# Filter out refs that exactly match prefix or that don't start
# with a number once the prefix is stripped (mostly a concern
# when prefix is '')
if not re.match(r"\d", r):
continue
if verbose:
print(f"picking {r}")
return {
"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": None,
"date": date,
}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {
"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": "no suitable tags",
"date": None,
}
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
# GIT_DIR can interfere with correct operation of Versioneer.
# It may be intended to be passed to the Versioneer-versioned project,
# but that should not change where we get our version from.
env = os.environ.copy()
env.pop("GIT_DIR", None)
runner = functools.partial(runner, env=env)
_, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose)
if rc != 0:
if verbose:
print(f"Directory {root} not under git control")
raise NotThisMethod("'git rev-parse --git-dir' returned error")
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = runner(
GITS,
[
"describe",
"--tags",
"--dirty",
"--always",
"--long",
"--match",
f"{tag_prefix}[[:digit:]]*",
],
cwd=root,
)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
# --abbrev-ref was added in git-1.6.3
if rc != 0 or branch_name is None:
raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
branch_name = branch_name.strip()
if branch_name == "HEAD":
# If we aren't exactly on a branch, pick a branch which represents
# the current commit. If all else fails, we are on a branchless
# commit.
branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
# --contains was added in git-1.5.4
if rc != 0 or branches is None:
raise NotThisMethod("'git branch --contains' returned error")
branches = branches.split("\n")
# Remove the first line if we're running detached
if "(" in branches[0]:
branches.pop(0)
# Strip off the leading "* " from the list of branches.
branches = [branch[2:] for branch in branches]
if "master" in branches:
branch_name = "master"
elif not branches:
branch_name = None
else:
# Pick the first branch that is returned. Good or bad.
branch_name = branches[0]
pieces["branch"] = branch_name
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[: git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
if not mo:
# unparsable. Maybe git-describe is misbehaving?
pieces["error"] = f"unable to parse git-describe output: '{describe_out}'"
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces[
"error"
] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'"
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix) :]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
# commit: short hex revision ID
pieces["short"] = mo.group(3)
else:
# HEX: no tags
pieces["closest-tag"] = None
out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
pieces["distance"] = len(out.split()) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
def plus_or_dot(pieces):
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces):
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += f"{pieces['distance']}.g{pieces['short']}"
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = f"0+untagged.{pieces['distance']}.g{pieces['short']}"
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_branch(pieces):
"""TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
The ".dev0" means not master branch. Note that .dev0 sorts backwards
(a feature branch will appear "older" than the master branch).
Exceptions:
1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += f"{pieces['distance']}.g{pieces['short']}"
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0"
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += f"+untagged.{pieces['distance']}.g{pieces['short']}"
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def pep440_split_post(ver):
"""Split pep440 version string at the post-release segment.
Returns the release segments before the post-release and the
post-release version number (or -1 if no post-release segment is present).
"""
vc = str.split(ver, ".post")
return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
def render_pep440_pre(pieces):
"""TAG[.postN.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
if pieces["distance"]:
# update the post release segment
tag_version, post_version = pep440_split_post(pieces["closest-tag"])
rendered = tag_version
if post_version is not None:
rendered += f".post{post_version + 1}.dev{pieces['distance']}"
else:
rendered += f".post0.dev{pieces['distance']}"
else:
# no commits, use the tag as the version
rendered = pieces["closest-tag"]
else:
# exception #1
rendered = f"0.post0.dev{pieces['distance']}"
return rendered
def render_pep440_post(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += f".post{pieces['distance']}"
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += f"g{pieces['short']}"
else:
# exception #1
rendered = f"0.post{pieces['distance']}"
if pieces["dirty"]:
rendered += ".dev0"
rendered += f"+g{pieces['short']}"
return rendered
def render_pep440_post_branch(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
The ".dev0" means not master branch.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += f".post{pieces['distance']}"
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += f"g{pieces['short']}"
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = f"0.post{pieces['distance']}"
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += f"+g{pieces['short']}"
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_old(pieces):
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += f"0.post{pieces['distance']}"
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = f"0.post{pieces['distance']}"
if pieces["dirty"]:
rendered += ".dev0"
return rendered
def render_git_describe(pieces):
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += f"-{pieces['distance']}-g{pieces['short']}"
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render_git_describe_long(pieces):
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += f"-{pieces['distance']}-g{pieces['short']}"
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {
"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None,
}
if not style or style == "default":
style = "pep440" # the default
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-branch":
rendered = render_pep440_branch(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-post-branch":
rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError(f"unknown style '{style}'")
return {
"version": rendered,
"full-revisionid": pieces["long"],
"dirty": pieces["dirty"],
"error": None,
"date": pieces.get("date"),
}
def get_versions():
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.
cfg = get_config()
verbose = cfg.verbose
try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
except NotThisMethod:
pass
try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for _ in cfg.versionfile_source.split("/"):
root = os.path.dirname(root)
except NameError:
return {
"version": "0+unknown",
"full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None,
}
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
return render(pieces, cfg.style)
except NotThisMethod:
pass
try:
if cfg.parentdir_prefix:
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
except NotThisMethod:
pass
return {
"version": "0+unknown",
"full-revisionid": None,
"dirty": None,
"error": "unable to compute version",
"date": None,
}

View file

@ -0,0 +1,2 @@
__version__="2.1.4"
__git_version__="a671b5a8bf5dd13fb19f0e88edc679bc9e15c673"

View file

@ -0,0 +1,16 @@
""" public toolkit API """
from pandas.api import (
extensions,
indexers,
interchange,
types,
typing,
)
__all__ = [
"interchange",
"extensions",
"indexers",
"types",
"typing",
]

View file

@ -0,0 +1,33 @@
"""
Public API for extending pandas objects.
"""
from pandas._libs.lib import no_default
from pandas.core.dtypes.base import (
ExtensionDtype,
register_extension_dtype,
)
from pandas.core.accessor import (
register_dataframe_accessor,
register_index_accessor,
register_series_accessor,
)
from pandas.core.algorithms import take
from pandas.core.arrays import (
ExtensionArray,
ExtensionScalarOpsMixin,
)
__all__ = [
"no_default",
"ExtensionDtype",
"register_extension_dtype",
"register_dataframe_accessor",
"register_index_accessor",
"register_series_accessor",
"take",
"ExtensionArray",
"ExtensionScalarOpsMixin",
]

View file

@ -0,0 +1,17 @@
"""
Public API for Rolling Window Indexers.
"""
from pandas.core.indexers import check_array_indexer
from pandas.core.indexers.objects import (
BaseIndexer,
FixedForwardWindowIndexer,
VariableOffsetWindowIndexer,
)
__all__ = [
"check_array_indexer",
"BaseIndexer",
"FixedForwardWindowIndexer",
"VariableOffsetWindowIndexer",
]

View file

@ -0,0 +1,8 @@
"""
Public API for DataFrame interchange protocol.
"""
from pandas.core.interchange.dataframe_protocol import DataFrame
from pandas.core.interchange.from_dataframe import from_dataframe
__all__ = ["from_dataframe", "DataFrame"]

View file

@ -0,0 +1,23 @@
"""
Public toolkit API.
"""
from pandas._libs.lib import infer_dtype
from pandas.core.dtypes.api import * # noqa: F403
from pandas.core.dtypes.concat import union_categoricals
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
IntervalDtype,
PeriodDtype,
)
__all__ = [
"infer_dtype",
"union_categoricals",
"CategoricalDtype",
"DatetimeTZDtype",
"IntervalDtype",
"PeriodDtype",
]

View file

@ -0,0 +1,55 @@
"""
Public API classes that store intermediate results useful for type-hinting.
"""
from pandas._libs import NaTType
from pandas._libs.missing import NAType
from pandas.core.groupby import (
DataFrameGroupBy,
SeriesGroupBy,
)
from pandas.core.resample import (
DatetimeIndexResamplerGroupby,
PeriodIndexResamplerGroupby,
Resampler,
TimedeltaIndexResamplerGroupby,
TimeGrouper,
)
from pandas.core.window import (
Expanding,
ExpandingGroupby,
ExponentialMovingWindow,
ExponentialMovingWindowGroupby,
Rolling,
RollingGroupby,
Window,
)
# TODO: Can't import Styler without importing jinja2
# from pandas.io.formats.style import Styler
from pandas.io.json._json import JsonReader
from pandas.io.stata import StataReader
__all__ = [
"DataFrameGroupBy",
"DatetimeIndexResamplerGroupby",
"Expanding",
"ExpandingGroupby",
"ExponentialMovingWindow",
"ExponentialMovingWindowGroupby",
"JsonReader",
"NaTType",
"NAType",
"PeriodIndexResamplerGroupby",
"Resampler",
"Rolling",
"RollingGroupby",
"SeriesGroupBy",
"StataReader",
# See TODO above
# "Styler",
"TimedeltaIndexResamplerGroupby",
"TimeGrouper",
"Window",
]

View file

@ -0,0 +1,53 @@
"""
All of pandas' ExtensionArrays.
See :ref:`extending.extension-types` for more.
"""
from pandas.core.arrays import (
ArrowExtensionArray,
ArrowStringArray,
BooleanArray,
Categorical,
DatetimeArray,
FloatingArray,
IntegerArray,
IntervalArray,
NumpyExtensionArray,
PeriodArray,
SparseArray,
StringArray,
TimedeltaArray,
)
__all__ = [
"ArrowExtensionArray",
"ArrowStringArray",
"BooleanArray",
"Categorical",
"DatetimeArray",
"FloatingArray",
"IntegerArray",
"IntervalArray",
"NumpyExtensionArray",
"PeriodArray",
"SparseArray",
"StringArray",
"TimedeltaArray",
]
def __getattr__(name: str):
if name == "PandasArray":
# GH#53694
import warnings
from pandas.util._exceptions import find_stack_level
warnings.warn(
"PandasArray has been renamed NumpyExtensionArray. Use that "
"instead. This alias will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return NumpyExtensionArray
raise AttributeError(f"module 'pandas.arrays' has no attribute '{name}'")

View file

@ -0,0 +1,199 @@
"""
compat
======
Cross-compatible functions for different versions of Python.
Other items:
* platform checker
"""
from __future__ import annotations
import os
import platform
import sys
from typing import TYPE_CHECKING
from pandas.compat._constants import (
IS64,
ISMUSL,
PY310,
PY311,
PY312,
PYPY,
)
import pandas.compat.compressors
from pandas.compat.numpy import is_numpy_dev
from pandas.compat.pyarrow import (
pa_version_under7p0,
pa_version_under8p0,
pa_version_under9p0,
pa_version_under11p0,
pa_version_under13p0,
pa_version_under14p0,
pa_version_under14p1,
)
if TYPE_CHECKING:
from pandas._typing import F
def set_function_name(f: F, name: str, cls: type) -> F:
"""
Bind the name/qualname attributes of the function.
"""
f.__name__ = name
f.__qualname__ = f"{cls.__name__}.{name}"
f.__module__ = cls.__module__
return f
def is_platform_little_endian() -> bool:
"""
Checking if the running platform is little endian.
Returns
-------
bool
True if the running platform is little endian.
"""
return sys.byteorder == "little"
def is_platform_windows() -> bool:
"""
Checking if the running platform is windows.
Returns
-------
bool
True if the running platform is windows.
"""
return sys.platform in ["win32", "cygwin"]
def is_platform_linux() -> bool:
"""
Checking if the running platform is linux.
Returns
-------
bool
True if the running platform is linux.
"""
return sys.platform == "linux"
def is_platform_mac() -> bool:
"""
Checking if the running platform is mac.
Returns
-------
bool
True if the running platform is mac.
"""
return sys.platform == "darwin"
def is_platform_arm() -> bool:
"""
Checking if the running platform use ARM architecture.
Returns
-------
bool
True if the running platform uses ARM architecture.
"""
return platform.machine() in ("arm64", "aarch64") or platform.machine().startswith(
"armv"
)
def is_platform_power() -> bool:
"""
Checking if the running platform use Power architecture.
Returns
-------
bool
True if the running platform uses ARM architecture.
"""
return platform.machine() in ("ppc64", "ppc64le")
def is_ci_environment() -> bool:
"""
Checking if running in a continuous integration environment by checking
the PANDAS_CI environment variable.
Returns
-------
bool
True if the running in a continuous integration environment.
"""
return os.environ.get("PANDAS_CI", "0") == "1"
def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
"""
Importing the `LZMAFile` class from the `lzma` module.
Returns
-------
class
The `LZMAFile` class from the `lzma` module.
Raises
------
RuntimeError
If the `lzma` module was not imported correctly, or didn't exist.
"""
if not pandas.compat.compressors.has_lzma:
raise RuntimeError(
"lzma module not available. "
"A Python re-install with the proper dependencies, "
"might be required to solve this issue."
)
return pandas.compat.compressors.LZMAFile
def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
"""
Importing the `BZ2File` class from the `bz2` module.
Returns
-------
class
The `BZ2File` class from the `bz2` module.
Raises
------
RuntimeError
If the `bz2` module was not imported correctly, or didn't exist.
"""
if not pandas.compat.compressors.has_bz2:
raise RuntimeError(
"bz2 module not available. "
"A Python re-install with the proper dependencies, "
"might be required to solve this issue."
)
return pandas.compat.compressors.BZ2File
__all__ = [
"is_numpy_dev",
"pa_version_under7p0",
"pa_version_under8p0",
"pa_version_under9p0",
"pa_version_under11p0",
"pa_version_under13p0",
"pa_version_under14p0",
"pa_version_under14p1",
"IS64",
"ISMUSL",
"PY310",
"PY311",
"PY312",
"PYPY",
]

View file

@ -0,0 +1,30 @@
"""
_constants
======
Constants relevant for the Python implementation.
"""
from __future__ import annotations
import platform
import sys
import sysconfig
IS64 = sys.maxsize > 2**32
PY310 = sys.version_info >= (3, 10)
PY311 = sys.version_info >= (3, 11)
PY312 = sys.version_info >= (3, 12)
PYPY = platform.python_implementation() == "PyPy"
ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
REF_COUNT = 2 if PY311 else 3
__all__ = [
"IS64",
"ISMUSL",
"PY310",
"PY311",
"PY312",
"PYPY",
]

View file

@ -0,0 +1,163 @@
from __future__ import annotations
import importlib
import sys
from typing import TYPE_CHECKING
import warnings
from pandas.util._exceptions import find_stack_level
from pandas.util.version import Version
if TYPE_CHECKING:
import types
# Update install.rst & setup.cfg when updating versions!
VERSIONS = {
"bs4": "4.11.1",
"blosc": "1.21.0",
"bottleneck": "1.3.4",
"dataframe-api-compat": "0.1.7",
"fastparquet": "0.8.1",
"fsspec": "2022.05.0",
"html5lib": "1.1",
"hypothesis": "6.46.1",
"gcsfs": "2022.05.0",
"jinja2": "3.1.2",
"lxml.etree": "4.8.0",
"matplotlib": "3.6.1",
"numba": "0.55.2",
"numexpr": "2.8.0",
"odfpy": "1.4.1",
"openpyxl": "3.0.10",
"pandas_gbq": "0.17.5",
"psycopg2": "2.9.3", # (dt dec pq3 ext lo64)
"pymysql": "1.0.2",
"pyarrow": "7.0.0",
"pyreadstat": "1.1.5",
"pytest": "7.3.2",
"pyxlsb": "1.0.9",
"s3fs": "2022.05.0",
"scipy": "1.8.1",
"sqlalchemy": "1.4.36",
"tables": "3.7.0",
"tabulate": "0.8.10",
"xarray": "2022.03.0",
"xlrd": "2.0.1",
"xlsxwriter": "3.0.3",
"zstandard": "0.17.0",
"tzdata": "2022.1",
"qtpy": "2.2.0",
"pyqt5": "5.15.6",
}
# A mapping from import name to package name (on PyPI) for packages where
# these two names are different.
INSTALL_MAPPING = {
"bs4": "beautifulsoup4",
"bottleneck": "Bottleneck",
"jinja2": "Jinja2",
"lxml.etree": "lxml",
"odf": "odfpy",
"pandas_gbq": "pandas-gbq",
"sqlalchemy": "SQLAlchemy",
"tables": "pytables",
}
def get_version(module: types.ModuleType) -> str:
version = getattr(module, "__version__", None)
if version is None:
raise ImportError(f"Can't determine version for {module.__name__}")
if module.__name__ == "psycopg2":
# psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
version = version.split()[0]
return version
def import_optional_dependency(
name: str,
extra: str = "",
errors: str = "raise",
min_version: str | None = None,
):
"""
Import an optional dependency.
By default, if a dependency is missing an ImportError with a nice
message will be raised. If a dependency is present, but too old,
we raise.
Parameters
----------
name : str
The module name.
extra : str
Additional text to include in the ImportError message.
errors : str {'raise', 'warn', 'ignore'}
What to do when a dependency is not found or its version is too old.
* raise : Raise an ImportError
* warn : Only applicable when a module's version is to old.
Warns that the version is too old and returns None
* ignore: If the module is not installed, return None, otherwise,
return the module, even if the version is too old.
It's expected that users validate the version locally when
using ``errors="ignore"`` (see. ``io/html.py``)
min_version : str, default None
Specify a minimum version that is different from the global pandas
minimum version required.
Returns
-------
maybe_module : Optional[ModuleType]
The imported module, when found and the version is correct.
None is returned when the package is not found and `errors`
is False, or when the package's version is too old and `errors`
is ``'warn'``.
"""
assert errors in {"warn", "raise", "ignore"}
package_name = INSTALL_MAPPING.get(name)
install_name = package_name if package_name is not None else name
msg = (
f"Missing optional dependency '{install_name}'. {extra} "
f"Use pip or conda to install {install_name}."
)
try:
module = importlib.import_module(name)
except ImportError:
if errors == "raise":
raise ImportError(msg)
return None
# Handle submodules: if we have submodule, grab parent module from sys.modules
parent = name.split(".")[0]
if parent != name:
install_name = parent
module_to_get = sys.modules[install_name]
else:
module_to_get = module
minimum_version = min_version if min_version is not None else VERSIONS.get(parent)
if minimum_version:
version = get_version(module_to_get)
if version and Version(version) < Version(minimum_version):
msg = (
f"Pandas requires version '{minimum_version}' or newer of '{parent}' "
f"(version '{version}' currently installed)."
)
if errors == "warn":
warnings.warn(
msg,
UserWarning,
stacklevel=find_stack_level(),
)
return None
elif errors == "raise":
raise ImportError(msg)
return module

View file

@ -0,0 +1,77 @@
"""
Patched ``BZ2File`` and ``LZMAFile`` to handle pickle protocol 5.
"""
from __future__ import annotations
from pickle import PickleBuffer
from pandas.compat._constants import PY310
try:
import bz2
has_bz2 = True
except ImportError:
has_bz2 = False
try:
import lzma
has_lzma = True
except ImportError:
has_lzma = False
def flatten_buffer(
b: bytes | bytearray | memoryview | PickleBuffer,
) -> bytes | bytearray | memoryview:
"""
Return some 1-D `uint8` typed buffer.
Coerces anything that does not match that description to one that does
without copying if possible (otherwise will copy).
"""
if isinstance(b, (bytes, bytearray)):
return b
if not isinstance(b, PickleBuffer):
b = PickleBuffer(b)
try:
# coerce to 1-D `uint8` C-contiguous `memoryview` zero-copy
return b.raw()
except BufferError:
# perform in-memory copy if buffer is not contiguous
return memoryview(b).tobytes("A")
if has_bz2:
class BZ2File(bz2.BZ2File):
if not PY310:
def write(self, b) -> int:
# Workaround issue where `bz2.BZ2File` expects `len`
# to return the number of bytes in `b` by converting
# `b` into something that meets that constraint with
# minimal copying.
#
# Note: This is fixed in Python 3.10.
return super().write(flatten_buffer(b))
if has_lzma:
class LZMAFile(lzma.LZMAFile):
if not PY310:
def write(self, b) -> int:
# Workaround issue where `lzma.LZMAFile` expects `len`
# to return the number of bytes in `b` by converting
# `b` into something that meets that constraint with
# minimal copying.
#
# Note: This is fixed in Python 3.10.
return super().write(flatten_buffer(b))

View file

@ -0,0 +1,52 @@
""" support numpy compatibility across versions """
import warnings
import numpy as np
from pandas.util.version import Version
# numpy versioning
_np_version = np.__version__
_nlv = Version(_np_version)
np_version_gte1p24 = _nlv >= Version("1.24")
np_version_gte1p24p3 = _nlv >= Version("1.24.3")
np_version_gte1p25 = _nlv >= Version("1.25")
np_version_gt2 = _nlv >= Version("2.0.0.dev0")
is_numpy_dev = _nlv.dev is not None
_min_numpy_ver = "1.22.4"
if _nlv < Version(_min_numpy_ver):
raise ImportError(
f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n"
f"your numpy version is {_np_version}.\n"
f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version"
)
np_long: type
np_ulong: type
if np_version_gt2:
try:
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
r".*In the future `np\.long` will be defined as.*",
FutureWarning,
)
np_long = np.long # type: ignore[attr-defined]
np_ulong = np.ulong # type: ignore[attr-defined]
except AttributeError:
np_long = np.int_
np_ulong = np.uint
else:
np_long = np.int_
np_ulong = np.uint
__all__ = [
"np",
"_np_version",
"is_numpy_dev",
]

View file

@ -0,0 +1,416 @@
"""
For compatibility with numpy libraries, pandas functions or methods have to
accept '*args' and '**kwargs' parameters to accommodate numpy arguments that
are not actually used or respected in the pandas implementation.
To ensure that users do not abuse these parameters, validation is performed in
'validators.py' to make sure that any extra parameters passed correspond ONLY
to those in the numpy signature. Part of that validation includes whether or
not the user attempted to pass in non-default values for these extraneous
parameters. As we want to discourage users from relying on these parameters
when calling the pandas implementation, we want them only to pass in the
default values for these parameters.
This module provides a set of commonly used default arguments for functions and
methods that are spread throughout the codebase. This module will make it
easier to adjust to future upstream changes in the analogous numpy signatures.
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
TypeVar,
cast,
overload,
)
import numpy as np
from numpy import ndarray
from pandas._libs.lib import (
is_bool,
is_integer,
)
from pandas.errors import UnsupportedFunctionCall
from pandas.util._validators import (
validate_args,
validate_args_and_kwargs,
validate_kwargs,
)
if TYPE_CHECKING:
from pandas._typing import (
Axis,
AxisInt,
)
AxisNoneT = TypeVar("AxisNoneT", Axis, None)
class CompatValidator:
def __init__(
self,
defaults,
fname=None,
method: str | None = None,
max_fname_arg_count=None,
) -> None:
self.fname = fname
self.method = method
self.defaults = defaults
self.max_fname_arg_count = max_fname_arg_count
def __call__(
self,
args,
kwargs,
fname=None,
max_fname_arg_count=None,
method: str | None = None,
) -> None:
if not args and not kwargs:
return None
fname = self.fname if fname is None else fname
max_fname_arg_count = (
self.max_fname_arg_count
if max_fname_arg_count is None
else max_fname_arg_count
)
method = self.method if method is None else method
if method == "args":
validate_args(fname, args, max_fname_arg_count, self.defaults)
elif method == "kwargs":
validate_kwargs(fname, kwargs, self.defaults)
elif method == "both":
validate_args_and_kwargs(
fname, args, kwargs, max_fname_arg_count, self.defaults
)
else:
raise ValueError(f"invalid validation method '{method}'")
ARGMINMAX_DEFAULTS = {"out": None}
validate_argmin = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
)
validate_argmax = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1
)
def process_skipna(skipna: bool | ndarray | None, args) -> tuple[bool, Any]:
if isinstance(skipna, ndarray) or skipna is None:
args = (skipna,) + args
skipna = True
return skipna, args
def validate_argmin_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
"""
If 'Series.argmin' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmin(args, kwargs)
return skipna
def validate_argmax_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
"""
If 'Series.argmax' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmax(args, kwargs)
return skipna
ARGSORT_DEFAULTS: dict[str, int | str | None] = {}
ARGSORT_DEFAULTS["axis"] = -1
ARGSORT_DEFAULTS["kind"] = "quicksort"
ARGSORT_DEFAULTS["order"] = None
ARGSORT_DEFAULTS["kind"] = None
validate_argsort = CompatValidator(
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
)
# two different signatures of argsort, this second validation for when the
# `kind` param is supported
ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {}
ARGSORT_DEFAULTS_KIND["axis"] = -1
ARGSORT_DEFAULTS_KIND["order"] = None
validate_argsort_kind = CompatValidator(
ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both"
)
def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) -> bool:
"""
If 'Categorical.argsort' is called via the 'numpy' library, the first
parameter in its signature is 'axis', which takes either an integer or
'None', so check if the 'ascending' parameter has either integer type or is
None, since 'ascending' itself should be a boolean
"""
if is_integer(ascending) or ascending is None:
args = (ascending,) + args
ascending = True
validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
ascending = cast(bool, ascending)
return ascending
CLIP_DEFAULTS: dict[str, Any] = {"out": None}
validate_clip = CompatValidator(
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
)
@overload
def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None:
...
@overload
def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT:
...
def validate_clip_with_axis(
axis: ndarray | AxisNoneT, args, kwargs
) -> AxisNoneT | None:
"""
If 'NDFrame.clip' is called via the numpy library, the third parameter in
its signature is 'out', which can takes an ndarray, so check if the 'axis'
parameter is an instance of ndarray, since 'axis' itself should either be
an integer or None
"""
if isinstance(axis, ndarray):
args = (axis,) + args
# error: Incompatible types in assignment (expression has type "None",
# variable has type "Union[ndarray[Any, Any], str, int]")
axis = None # type: ignore[assignment]
validate_clip(args, kwargs)
# error: Incompatible return value type (got "Union[ndarray[Any, Any],
# str, int]", expected "Union[str, int, None]")
return axis # type: ignore[return-value]
CUM_FUNC_DEFAULTS: dict[str, Any] = {}
CUM_FUNC_DEFAULTS["dtype"] = None
CUM_FUNC_DEFAULTS["out"] = None
validate_cum_func = CompatValidator(
CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1
)
validate_cumsum = CompatValidator(
CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1
)
def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool:
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so
check if the 'skipna' parameter is a boolean or not
"""
if not is_bool(skipna):
args = (skipna,) + args
skipna = True
elif isinstance(skipna, np.bool_):
skipna = bool(skipna)
validate_cum_func(args, kwargs, fname=name)
return skipna
ALLANY_DEFAULTS: dict[str, bool | None] = {}
ALLANY_DEFAULTS["dtype"] = None
ALLANY_DEFAULTS["out"] = None
ALLANY_DEFAULTS["keepdims"] = False
ALLANY_DEFAULTS["axis"] = None
validate_all = CompatValidator(
ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1
)
validate_any = CompatValidator(
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
)
LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False}
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
MINMAX_DEFAULTS = {"axis": None, "dtype": None, "out": None, "keepdims": False}
validate_min = CompatValidator(
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
)
validate_max = CompatValidator(
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
)
RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"}
validate_reshape = CompatValidator(
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
)
REPEAT_DEFAULTS: dict[str, Any] = {"axis": None}
validate_repeat = CompatValidator(
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
)
ROUND_DEFAULTS: dict[str, Any] = {"out": None}
validate_round = CompatValidator(
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
)
SORT_DEFAULTS: dict[str, int | str | None] = {}
SORT_DEFAULTS["axis"] = -1
SORT_DEFAULTS["kind"] = "quicksort"
SORT_DEFAULTS["order"] = None
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
STAT_FUNC_DEFAULTS: dict[str, Any | None] = {}
STAT_FUNC_DEFAULTS["dtype"] = None
STAT_FUNC_DEFAULTS["out"] = None
SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
SUM_DEFAULTS["axis"] = None
SUM_DEFAULTS["keepdims"] = False
SUM_DEFAULTS["initial"] = None
PROD_DEFAULTS = SUM_DEFAULTS.copy()
MEAN_DEFAULTS = SUM_DEFAULTS.copy()
MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
MEDIAN_DEFAULTS["overwrite_input"] = False
MEDIAN_DEFAULTS["keepdims"] = False
STAT_FUNC_DEFAULTS["keepdims"] = False
validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs")
validate_sum = CompatValidator(
SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1
)
validate_prod = CompatValidator(
PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1
)
validate_mean = CompatValidator(
MEAN_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1
)
validate_median = CompatValidator(
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
)
STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {}
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
STAT_DDOF_FUNC_DEFAULTS["out"] = None
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
TAKE_DEFAULTS: dict[str, str | None] = {}
TAKE_DEFAULTS["out"] = None
TAKE_DEFAULTS["mode"] = "raise"
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) -> bool:
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'axis', which takes either an ndarray or 'None', so check
if the 'convert' parameter is either an instance of ndarray or is None
"""
if isinstance(convert, ndarray) or convert is None:
args = (convert,) + args
convert = True
validate_take(args, kwargs, max_fname_arg_count=3, method="both")
return convert
TRANSPOSE_DEFAULTS = {"axes": None}
validate_transpose = CompatValidator(
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
)
def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None:
"""
'args' and 'kwargs' should be empty, except for allowed kwargs because all
of their necessary parameters are explicitly listed in the function
signature
"""
if allowed is None:
allowed = []
kwargs = set(kwargs) - set(allowed)
if len(args) + len(kwargs) > 0:
raise UnsupportedFunctionCall(
"numpy operations are not valid with groupby. "
f"Use .groupby(...).{name}() instead"
)
RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var")
def validate_resampler_func(method: str, args, kwargs) -> None:
"""
'args' and 'kwargs' should be empty because all of their necessary
parameters are explicitly listed in the function signature
"""
if len(args) + len(kwargs) > 0:
if method in RESAMPLER_NUMPY_OPS:
raise UnsupportedFunctionCall(
"numpy operations are not valid with resample. "
f"Use .resample(...).{method}() instead"
)
raise TypeError("too many arguments passed in")
def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None:
"""
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
or None, as otherwise it will be incorrectly ignored.
Parameters
----------
axis : int or None
ndim : int, default 1
Raises
------
ValueError
"""
if axis is None:
return
if axis >= ndim or (axis < 0 and ndim + axis < 0):
raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})")
_validation_funcs = {
"median": validate_median,
"mean": validate_mean,
"min": validate_min,
"max": validate_max,
"sum": validate_sum,
"prod": validate_prod,
}
def validate_func(fname, args, kwargs) -> None:
if fname not in _validation_funcs:
return validate_stat_func(args, kwargs, fname=fname)
validation_func = _validation_funcs[fname]
return validation_func(args, kwargs)

View file

@ -0,0 +1,262 @@
"""
Support pre-0.12 series pickle compatibility.
"""
from __future__ import annotations
import contextlib
import copy
import io
import pickle as pkl
from typing import TYPE_CHECKING
import numpy as np
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import BaseOffset
from pandas import Index
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.internals import BlockManager
if TYPE_CHECKING:
from collections.abc import Generator
def load_reduce(self):
stack = self.stack
args = stack.pop()
func = stack[-1]
try:
stack[-1] = func(*args)
return
except TypeError as err:
# If we have a deprecated function,
# try to replace and try again.
msg = "_reconstruct: First argument must be a sub-type of ndarray"
if msg in str(err):
try:
cls = args[0]
stack[-1] = object.__new__(cls)
return
except TypeError:
pass
elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
cls = args[0]
stack[-1] = cls.__new__(*args)
return
elif args and issubclass(args[0], PeriodArray):
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return
raise
# If classes are moved, provide compat here.
_class_locations_map = {
("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
# 15477
("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"),
# Re-routing unpickle block logic to go through _unpickle_block instead
# for pandas <= 1.3.5
("pandas.core.internals.blocks", "new_block"): (
"pandas._libs.internals",
"_unpickle_block",
),
("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"),
("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
# 10890
("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
("pandas.sparse.series", "SparseTimeSeries"): (
"pandas.core.sparse.series",
"SparseSeries",
),
# 12588, extensions moving
("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
# 18543 moving period
("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
# 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
("pandas.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
("pandas._libs.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
# 15998 top-level dirs moving
("pandas.sparse.array", "SparseArray"): (
"pandas.core.arrays.sparse",
"SparseArray",
),
("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
("pandas.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
("pandas.tseries.index", "_new_DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"_new_DatetimeIndex",
),
("pandas.tseries.index", "DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"DatetimeIndex",
),
("pandas.tseries.period", "PeriodIndex"): (
"pandas.core.indexes.period",
"PeriodIndex",
),
# 19269, arrays moving
("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
# 19939, add timedeltaindex, float64index compat from 15998 move
("pandas.tseries.tdi", "TimedeltaIndex"): (
"pandas.core.indexes.timedeltas",
"TimedeltaIndex",
),
("pandas.indexes.numeric", "Float64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
# 50775, remove Int64Index, UInt64Index & Float64Index from codabase
("pandas.core.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index",
),
("pandas.core.indexes.numeric", "UInt64Index"): (
"pandas.core.indexes.base",
"Index",
),
("pandas.core.indexes.numeric", "Float64Index"): (
"pandas.core.indexes.base",
"Index",
),
("pandas.core.arrays.sparse.dtype", "SparseDtype"): (
"pandas.core.dtypes.dtypes",
"SparseDtype",
),
}
# our Unpickler sub-class to override methods and some dispatcher
# functions for compat and uses a non-public class of the pickle module.
class Unpickler(pkl._Unpickler):
def find_class(self, module, name):
# override superclass
key = (module, name)
module, name = _class_locations_map.get(key, key)
return super().find_class(module, name)
Unpickler.dispatch = copy.copy(Unpickler.dispatch)
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
def load_newobj(self) -> None:
args = self.stack.pop()
cls = self.stack[-1]
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
elif issubclass(cls, DatetimeArray) and not args:
arr = np.array([], dtype="M8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif issubclass(cls, TimedeltaArray) and not args:
arr = np.array([], dtype="m8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif cls is BlockManager and not args:
obj = cls.__new__(cls, (), [], False)
else:
obj = cls.__new__(cls, *args)
self.stack[-1] = obj
Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
def load_newobj_ex(self) -> None:
kwargs = self.stack.pop()
args = self.stack.pop()
cls = self.stack.pop()
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
else:
obj = cls.__new__(cls, *args, **kwargs)
self.append(obj)
try:
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
except (AttributeError, KeyError):
pass
def load(fh, encoding: str | None = None, is_verbose: bool = False):
"""
Load a pickle, with a provided encoding,
Parameters
----------
fh : a filelike object
encoding : an optional encoding
is_verbose : show exception output
"""
try:
fh.seek(0)
if encoding is not None:
up = Unpickler(fh, encoding=encoding)
else:
up = Unpickler(fh)
# "Unpickler" has no attribute "is_verbose" [attr-defined]
up.is_verbose = is_verbose # type: ignore[attr-defined]
return up.load()
except (ValueError, TypeError):
raise
def loads(
bytes_object: bytes,
*,
fix_imports: bool = True,
encoding: str = "ASCII",
errors: str = "strict",
):
"""
Analogous to pickle._loads.
"""
fd = io.BytesIO(bytes_object)
return Unpickler(
fd, fix_imports=fix_imports, encoding=encoding, errors=errors
).load()
@contextlib.contextmanager
def patch_pickle() -> Generator[None, None, None]:
"""
Temporarily patch pickle to use our unpickler.
"""
orig_loads = pkl.loads
try:
setattr(pkl, "loads", loads)
yield
finally:
setattr(pkl, "loads", orig_loads)

View file

@ -0,0 +1,31 @@
""" support pyarrow compatibility across versions """
from __future__ import annotations
from pandas.util.version import Version
try:
import pyarrow as pa
_palv = Version(Version(pa.__version__).base_version)
pa_version_under7p0 = _palv < Version("7.0.0")
pa_version_under8p0 = _palv < Version("8.0.0")
pa_version_under9p0 = _palv < Version("9.0.0")
pa_version_under10p0 = _palv < Version("10.0.0")
pa_version_under11p0 = _palv < Version("11.0.0")
pa_version_under12p0 = _palv < Version("12.0.0")
pa_version_under13p0 = _palv < Version("13.0.0")
pa_version_under14p0 = _palv < Version("14.0.0")
pa_version_under14p1 = _palv < Version("14.0.1")
pa_version_under15p0 = _palv < Version("15.0.0")
except ImportError:
pa_version_under7p0 = True
pa_version_under8p0 = True
pa_version_under9p0 = True
pa_version_under10p0 = True
pa_version_under11p0 = True
pa_version_under12p0 = True
pa_version_under13p0 = True
pa_version_under14p0 = True
pa_version_under14p1 = True
pa_version_under15p0 = True

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,200 @@
from __future__ import annotations
import functools
from typing import (
TYPE_CHECKING,
Any,
Callable,
)
if TYPE_CHECKING:
from pandas._typing import Scalar
import numpy as np
from pandas.compat._optional import import_optional_dependency
@functools.cache
def make_looper(func, result_dtype, is_grouped_kernel, nopython, nogil, parallel):
if TYPE_CHECKING:
import numba
else:
numba = import_optional_dependency("numba")
if is_grouped_kernel:
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
def column_looper(
values: np.ndarray,
labels: np.ndarray,
ngroups: int,
min_periods: int,
*args,
):
result = np.empty((values.shape[0], ngroups), dtype=result_dtype)
na_positions = {}
for i in numba.prange(values.shape[0]):
output, na_pos = func(
values[i], result_dtype, labels, ngroups, min_periods, *args
)
result[i] = output
if len(na_pos) > 0:
na_positions[i] = np.array(na_pos)
return result, na_positions
else:
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
def column_looper(
values: np.ndarray,
start: np.ndarray,
end: np.ndarray,
min_periods: int,
*args,
):
result = np.empty((values.shape[0], len(start)), dtype=result_dtype)
na_positions = {}
for i in numba.prange(values.shape[0]):
output, na_pos = func(
values[i], result_dtype, start, end, min_periods, *args
)
result[i] = output
if len(na_pos) > 0:
na_positions[i] = np.array(na_pos)
return result, na_positions
return column_looper
default_dtype_mapping: dict[np.dtype, Any] = {
np.dtype("int8"): np.int64,
np.dtype("int16"): np.int64,
np.dtype("int32"): np.int64,
np.dtype("int64"): np.int64,
np.dtype("uint8"): np.uint64,
np.dtype("uint16"): np.uint64,
np.dtype("uint32"): np.uint64,
np.dtype("uint64"): np.uint64,
np.dtype("float32"): np.float64,
np.dtype("float64"): np.float64,
np.dtype("complex64"): np.complex128,
np.dtype("complex128"): np.complex128,
}
# TODO: Preserve complex dtypes
float_dtype_mapping: dict[np.dtype, Any] = {
np.dtype("int8"): np.float64,
np.dtype("int16"): np.float64,
np.dtype("int32"): np.float64,
np.dtype("int64"): np.float64,
np.dtype("uint8"): np.float64,
np.dtype("uint16"): np.float64,
np.dtype("uint32"): np.float64,
np.dtype("uint64"): np.float64,
np.dtype("float32"): np.float64,
np.dtype("float64"): np.float64,
np.dtype("complex64"): np.float64,
np.dtype("complex128"): np.float64,
}
identity_dtype_mapping: dict[np.dtype, Any] = {
np.dtype("int8"): np.int8,
np.dtype("int16"): np.int16,
np.dtype("int32"): np.int32,
np.dtype("int64"): np.int64,
np.dtype("uint8"): np.uint8,
np.dtype("uint16"): np.uint16,
np.dtype("uint32"): np.uint32,
np.dtype("uint64"): np.uint64,
np.dtype("float32"): np.float32,
np.dtype("float64"): np.float64,
np.dtype("complex64"): np.complex64,
np.dtype("complex128"): np.complex128,
}
def generate_shared_aggregator(
func: Callable[..., Scalar],
dtype_mapping: dict[np.dtype, np.dtype],
is_grouped_kernel: bool,
nopython: bool,
nogil: bool,
parallel: bool,
):
"""
Generate a Numba function that loops over the columns 2D object and applies
a 1D numba kernel over each column.
Parameters
----------
func : function
aggregation function to be applied to each column
dtype_mapping: dict or None
If not None, maps a dtype to a result dtype.
Otherwise, will fall back to default mapping.
is_grouped_kernel: bool, default False
Whether func operates using the group labels (True)
or using starts/ends arrays
If true, you also need to pass the number of groups to this function
nopython : bool
nopython to be passed into numba.jit
nogil : bool
nogil to be passed into numba.jit
parallel : bool
parallel to be passed into numba.jit
Returns
-------
Numba function
"""
# A wrapper around the looper function,
# to dispatch based on dtype since numba is unable to do that in nopython mode
# It also post-processes the values by inserting nans where number of observations
# is less than min_periods
# Cannot do this in numba nopython mode
# (you'll run into type-unification error when you cast int -> float)
def looper_wrapper(
values,
start=None,
end=None,
labels=None,
ngroups=None,
min_periods: int = 0,
**kwargs,
):
result_dtype = dtype_mapping[values.dtype]
column_looper = make_looper(
func, result_dtype, is_grouped_kernel, nopython, nogil, parallel
)
# Need to unpack kwargs since numba only supports *args
if is_grouped_kernel:
result, na_positions = column_looper(
values, labels, ngroups, min_periods, *kwargs.values()
)
else:
result, na_positions = column_looper(
values, start, end, min_periods, *kwargs.values()
)
if result.dtype.kind == "i":
# Look if na_positions is not empty
# If so, convert the whole block
# This is OK since int dtype cannot hold nan,
# so if min_periods not satisfied for 1 col, it is not satisfied for
# all columns at that index
for na_pos in na_positions.values():
if len(na_pos) > 0:
result = result.astype("float64")
break
# TODO: Optimize this
for i, na_pos in na_positions.items():
if len(na_pos) > 0:
result[i, na_pos] = np.nan
return result
return looper_wrapper

View file

@ -0,0 +1,27 @@
from pandas.core._numba.kernels.mean_ import (
grouped_mean,
sliding_mean,
)
from pandas.core._numba.kernels.min_max_ import (
grouped_min_max,
sliding_min_max,
)
from pandas.core._numba.kernels.sum_ import (
grouped_sum,
sliding_sum,
)
from pandas.core._numba.kernels.var_ import (
grouped_var,
sliding_var,
)
__all__ = [
"sliding_mean",
"grouped_mean",
"sliding_sum",
"grouped_sum",
"sliding_var",
"grouped_var",
"sliding_min_max",
"grouped_min_max",
]

View file

@ -0,0 +1,196 @@
"""
Numba 1D mean kernels that can be shared by
* Dataframe / Series
* groupby
* rolling / expanding
Mirrors pandas/_libs/window/aggregation.pyx
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import numba
import numpy as np
from pandas.core._numba.kernels.shared import is_monotonic_increasing
from pandas.core._numba.kernels.sum_ import grouped_kahan_sum
if TYPE_CHECKING:
from pandas._typing import npt
@numba.jit(nopython=True, nogil=True, parallel=False)
def add_mean(
val: float,
nobs: int,
sum_x: float,
neg_ct: int,
compensation: float,
num_consecutive_same_value: int,
prev_value: float,
) -> tuple[int, float, int, float, int, float]:
if not np.isnan(val):
nobs += 1
y = val - compensation
t = sum_x + y
compensation = t - sum_x - y
sum_x = t
if val < 0:
neg_ct += 1
if val == prev_value:
num_consecutive_same_value += 1
else:
num_consecutive_same_value = 1
prev_value = val
return nobs, sum_x, neg_ct, compensation, num_consecutive_same_value, prev_value
@numba.jit(nopython=True, nogil=True, parallel=False)
def remove_mean(
val: float, nobs: int, sum_x: float, neg_ct: int, compensation: float
) -> tuple[int, float, int, float]:
if not np.isnan(val):
nobs -= 1
y = -val - compensation
t = sum_x + y
compensation = t - sum_x - y
sum_x = t
if val < 0:
neg_ct -= 1
return nobs, sum_x, neg_ct, compensation
@numba.jit(nopython=True, nogil=True, parallel=False)
def sliding_mean(
values: np.ndarray,
result_dtype: np.dtype,
start: np.ndarray,
end: np.ndarray,
min_periods: int,
) -> tuple[np.ndarray, list[int]]:
N = len(start)
nobs = 0
sum_x = 0.0
neg_ct = 0
compensation_add = 0.0
compensation_remove = 0.0
is_monotonic_increasing_bounds = is_monotonic_increasing(
start
) and is_monotonic_increasing(end)
output = np.empty(N, dtype=result_dtype)
for i in range(N):
s = start[i]
e = end[i]
if i == 0 or not is_monotonic_increasing_bounds:
prev_value = values[s]
num_consecutive_same_value = 0
for j in range(s, e):
val = values[j]
(
nobs,
sum_x,
neg_ct,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_mean(
val,
nobs,
sum_x,
neg_ct,
compensation_add,
num_consecutive_same_value,
prev_value, # pyright: ignore[reportGeneralTypeIssues]
)
else:
for j in range(start[i - 1], s):
val = values[j]
nobs, sum_x, neg_ct, compensation_remove = remove_mean(
val, nobs, sum_x, neg_ct, compensation_remove
)
for j in range(end[i - 1], e):
val = values[j]
(
nobs,
sum_x,
neg_ct,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_mean(
val,
nobs,
sum_x,
neg_ct,
compensation_add,
num_consecutive_same_value,
prev_value, # pyright: ignore[reportGeneralTypeIssues]
)
if nobs >= min_periods and nobs > 0:
result = sum_x / nobs
if num_consecutive_same_value >= nobs:
result = prev_value
elif neg_ct == 0 and result < 0:
result = 0
elif neg_ct == nobs and result > 0:
result = 0
else:
result = np.nan
output[i] = result
if not is_monotonic_increasing_bounds:
nobs = 0
sum_x = 0.0
neg_ct = 0
compensation_remove = 0.0
# na_position is empty list since float64 can already hold nans
# Do list comprehension, since numba cannot figure out that na_pos is
# empty list of ints on its own
na_pos = [0 for i in range(0)]
return output, na_pos
@numba.jit(nopython=True, nogil=True, parallel=False)
def grouped_mean(
values: np.ndarray,
result_dtype: np.dtype,
labels: npt.NDArray[np.intp],
ngroups: int,
min_periods: int,
) -> tuple[np.ndarray, list[int]]:
output, nobs_arr, comp_arr, consecutive_counts, prev_vals = grouped_kahan_sum(
values, result_dtype, labels, ngroups
)
# Post-processing, replace sums that don't satisfy min_periods
for lab in range(ngroups):
nobs = nobs_arr[lab]
num_consecutive_same_value = consecutive_counts[lab]
prev_value = prev_vals[lab]
sum_x = output[lab]
if nobs >= min_periods:
if num_consecutive_same_value >= nobs:
result = prev_value * nobs
else:
result = sum_x
else:
result = np.nan
result /= nobs
output[lab] = result
# na_position is empty list since float64 can already hold nans
# Do list comprehension, since numba cannot figure out that na_pos is
# empty list of ints on its own
na_pos = [0 for i in range(0)]
return output, na_pos

View file

@ -0,0 +1,125 @@
"""
Numba 1D min/max kernels that can be shared by
* Dataframe / Series
* groupby
* rolling / expanding
Mirrors pandas/_libs/window/aggregation.pyx
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import numba
import numpy as np
if TYPE_CHECKING:
from pandas._typing import npt
@numba.jit(nopython=True, nogil=True, parallel=False)
def sliding_min_max(
values: np.ndarray,
result_dtype: np.dtype,
start: np.ndarray,
end: np.ndarray,
min_periods: int,
is_max: bool,
) -> tuple[np.ndarray, list[int]]:
N = len(start)
nobs = 0
output = np.empty(N, dtype=result_dtype)
na_pos = []
# Use deque once numba supports it
# https://github.com/numba/numba/issues/7417
Q: list = []
W: list = []
for i in range(N):
curr_win_size = end[i] - start[i]
if i == 0:
st = start[i]
else:
st = end[i - 1]
for k in range(st, end[i]):
ai = values[k]
if not np.isnan(ai):
nobs += 1
elif is_max:
ai = -np.inf
else:
ai = np.inf
# Discard previous entries if we find new min or max
if is_max:
while Q and ((ai >= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]):
Q.pop()
else:
while Q and ((ai <= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]):
Q.pop()
Q.append(k)
W.append(k)
# Discard entries outside and left of current window
while Q and Q[0] <= start[i] - 1:
Q.pop(0)
while W and W[0] <= start[i] - 1:
if not np.isnan(values[W[0]]):
nobs -= 1
W.pop(0)
# Save output based on index in input value array
if Q and curr_win_size > 0 and nobs >= min_periods:
output[i] = values[Q[0]]
else:
if values.dtype.kind != "i":
output[i] = np.nan
else:
na_pos.append(i)
return output, na_pos
@numba.jit(nopython=True, nogil=True, parallel=False)
def grouped_min_max(
values: np.ndarray,
result_dtype: np.dtype,
labels: npt.NDArray[np.intp],
ngroups: int,
min_periods: int,
is_max: bool,
) -> tuple[np.ndarray, list[int]]:
N = len(labels)
nobs = np.zeros(ngroups, dtype=np.int64)
na_pos = []
output = np.empty(ngroups, dtype=result_dtype)
for i in range(N):
lab = labels[i]
val = values[i]
if lab < 0:
continue
if values.dtype.kind == "i" or not np.isnan(val):
nobs[lab] += 1
else:
# NaN value cannot be a min/max value
continue
if nobs[lab] == 1:
# First element in group, set output equal to this
output[lab] = val
continue
if is_max:
if val > output[lab]:
output[lab] = val
else:
if val < output[lab]:
output[lab] = val
# Set labels that don't satisfy min_periods as np.nan
for lab, count in enumerate(nobs):
if count < min_periods:
na_pos.append(lab)
return output, na_pos

View file

@ -0,0 +1,29 @@
from __future__ import annotations
from typing import TYPE_CHECKING
import numba
if TYPE_CHECKING:
import numpy as np
@numba.jit(
# error: Any? not callable
numba.boolean(numba.int64[:]), # type: ignore[misc]
nopython=True,
nogil=True,
parallel=False,
)
def is_monotonic_increasing(bounds: np.ndarray) -> bool:
"""Check if int64 values are monotonically increasing."""
n = len(bounds)
if n < 2:
return True
prev = bounds[0]
for i in range(1, n):
cur = bounds[i]
if cur < prev:
return False
prev = cur
return True

View file

@ -0,0 +1,244 @@
"""
Numba 1D sum kernels that can be shared by
* Dataframe / Series
* groupby
* rolling / expanding
Mirrors pandas/_libs/window/aggregation.pyx
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
)
import numba
from numba.extending import register_jitable
import numpy as np
if TYPE_CHECKING:
from pandas._typing import npt
from pandas.core._numba.kernels.shared import is_monotonic_increasing
@numba.jit(nopython=True, nogil=True, parallel=False)
def add_sum(
val: Any,
nobs: int,
sum_x: Any,
compensation: Any,
num_consecutive_same_value: int,
prev_value: Any,
) -> tuple[int, Any, Any, int, Any]:
if not np.isnan(val):
nobs += 1
y = val - compensation
t = sum_x + y
compensation = t - sum_x - y
sum_x = t
if val == prev_value:
num_consecutive_same_value += 1
else:
num_consecutive_same_value = 1
prev_value = val
return nobs, sum_x, compensation, num_consecutive_same_value, prev_value
@numba.jit(nopython=True, nogil=True, parallel=False)
def remove_sum(
val: Any, nobs: int, sum_x: Any, compensation: Any
) -> tuple[int, Any, Any]:
if not np.isnan(val):
nobs -= 1
y = -val - compensation
t = sum_x + y
compensation = t - sum_x - y
sum_x = t
return nobs, sum_x, compensation
@numba.jit(nopython=True, nogil=True, parallel=False)
def sliding_sum(
values: np.ndarray,
result_dtype: np.dtype,
start: np.ndarray,
end: np.ndarray,
min_periods: int,
) -> tuple[np.ndarray, list[int]]:
dtype = values.dtype
na_val: object = np.nan
if dtype.kind == "i":
na_val = 0
N = len(start)
nobs = 0
sum_x = 0
compensation_add = 0
compensation_remove = 0
na_pos = []
is_monotonic_increasing_bounds = is_monotonic_increasing(
start
) and is_monotonic_increasing(end)
output = np.empty(N, dtype=result_dtype)
for i in range(N):
s = start[i]
e = end[i]
if i == 0 or not is_monotonic_increasing_bounds:
prev_value = values[s]
num_consecutive_same_value = 0
for j in range(s, e):
val = values[j]
(
nobs,
sum_x,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_sum(
val,
nobs,
sum_x,
compensation_add,
num_consecutive_same_value,
prev_value,
)
else:
for j in range(start[i - 1], s):
val = values[j]
nobs, sum_x, compensation_remove = remove_sum(
val, nobs, sum_x, compensation_remove
)
for j in range(end[i - 1], e):
val = values[j]
(
nobs,
sum_x,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_sum(
val,
nobs,
sum_x,
compensation_add,
num_consecutive_same_value,
prev_value,
)
if nobs == 0 == min_periods:
result: object = 0
elif nobs >= min_periods:
if num_consecutive_same_value >= nobs:
result = prev_value * nobs
else:
result = sum_x
else:
result = na_val
if dtype.kind == "i":
na_pos.append(i)
output[i] = result
if not is_monotonic_increasing_bounds:
nobs = 0
sum_x = 0
compensation_remove = 0
return output, na_pos
# Mypy/pyright don't like the fact that the decorator is untyped
@register_jitable # type: ignore[misc]
def grouped_kahan_sum(
values: np.ndarray,
result_dtype: np.dtype,
labels: npt.NDArray[np.intp],
ngroups: int,
) -> tuple[
np.ndarray, npt.NDArray[np.int64], np.ndarray, npt.NDArray[np.int64], np.ndarray
]:
N = len(labels)
nobs_arr = np.zeros(ngroups, dtype=np.int64)
comp_arr = np.zeros(ngroups, dtype=values.dtype)
consecutive_counts = np.zeros(ngroups, dtype=np.int64)
prev_vals = np.zeros(ngroups, dtype=values.dtype)
output = np.zeros(ngroups, dtype=result_dtype)
for i in range(N):
lab = labels[i]
val = values[i]
if lab < 0:
continue
sum_x = output[lab]
nobs = nobs_arr[lab]
compensation_add = comp_arr[lab]
num_consecutive_same_value = consecutive_counts[lab]
prev_value = prev_vals[lab]
(
nobs,
sum_x,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_sum(
val,
nobs,
sum_x,
compensation_add,
num_consecutive_same_value,
prev_value,
)
output[lab] = sum_x
consecutive_counts[lab] = num_consecutive_same_value
prev_vals[lab] = prev_value
comp_arr[lab] = compensation_add
nobs_arr[lab] = nobs
return output, nobs_arr, comp_arr, consecutive_counts, prev_vals
@numba.jit(nopython=True, nogil=True, parallel=False)
def grouped_sum(
values: np.ndarray,
result_dtype: np.dtype,
labels: npt.NDArray[np.intp],
ngroups: int,
min_periods: int,
) -> tuple[np.ndarray, list[int]]:
na_pos = []
output, nobs_arr, comp_arr, consecutive_counts, prev_vals = grouped_kahan_sum(
values, result_dtype, labels, ngroups
)
# Post-processing, replace sums that don't satisfy min_periods
for lab in range(ngroups):
nobs = nobs_arr[lab]
num_consecutive_same_value = consecutive_counts[lab]
prev_value = prev_vals[lab]
sum_x = output[lab]
if nobs >= min_periods:
if num_consecutive_same_value >= nobs:
result = prev_value * nobs
else:
result = sum_x
else:
result = sum_x # Don't change val, will be replaced by nan later
na_pos.append(lab)
output[lab] = result
return output, na_pos

View file

@ -0,0 +1,245 @@
"""
Numba 1D var kernels that can be shared by
* Dataframe / Series
* groupby
* rolling / expanding
Mirrors pandas/_libs/window/aggregation.pyx
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import numba
import numpy as np
if TYPE_CHECKING:
from pandas._typing import npt
from pandas.core._numba.kernels.shared import is_monotonic_increasing
@numba.jit(nopython=True, nogil=True, parallel=False)
def add_var(
val: float,
nobs: int,
mean_x: float,
ssqdm_x: float,
compensation: float,
num_consecutive_same_value: int,
prev_value: float,
) -> tuple[int, float, float, float, int, float]:
if not np.isnan(val):
if val == prev_value:
num_consecutive_same_value += 1
else:
num_consecutive_same_value = 1
prev_value = val
nobs += 1
prev_mean = mean_x - compensation
y = val - compensation
t = y - mean_x
compensation = t + mean_x - y
delta = t
if nobs:
mean_x += delta / nobs
else:
mean_x = 0
ssqdm_x += (val - prev_mean) * (val - mean_x)
return nobs, mean_x, ssqdm_x, compensation, num_consecutive_same_value, prev_value
@numba.jit(nopython=True, nogil=True, parallel=False)
def remove_var(
val: float, nobs: int, mean_x: float, ssqdm_x: float, compensation: float
) -> tuple[int, float, float, float]:
if not np.isnan(val):
nobs -= 1
if nobs:
prev_mean = mean_x - compensation
y = val - compensation
t = y - mean_x
compensation = t + mean_x - y
delta = t
mean_x -= delta / nobs
ssqdm_x -= (val - prev_mean) * (val - mean_x)
else:
mean_x = 0
ssqdm_x = 0
return nobs, mean_x, ssqdm_x, compensation
@numba.jit(nopython=True, nogil=True, parallel=False)
def sliding_var(
values: np.ndarray,
result_dtype: np.dtype,
start: np.ndarray,
end: np.ndarray,
min_periods: int,
ddof: int = 1,
) -> tuple[np.ndarray, list[int]]:
N = len(start)
nobs = 0
mean_x = 0.0
ssqdm_x = 0.0
compensation_add = 0.0
compensation_remove = 0.0
min_periods = max(min_periods, 1)
is_monotonic_increasing_bounds = is_monotonic_increasing(
start
) and is_monotonic_increasing(end)
output = np.empty(N, dtype=result_dtype)
for i in range(N):
s = start[i]
e = end[i]
if i == 0 or not is_monotonic_increasing_bounds:
prev_value = values[s]
num_consecutive_same_value = 0
for j in range(s, e):
val = values[j]
(
nobs,
mean_x,
ssqdm_x,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_var(
val,
nobs,
mean_x,
ssqdm_x,
compensation_add,
num_consecutive_same_value,
prev_value, # pyright: ignore[reportGeneralTypeIssues]
)
else:
for j in range(start[i - 1], s):
val = values[j]
nobs, mean_x, ssqdm_x, compensation_remove = remove_var(
val, nobs, mean_x, ssqdm_x, compensation_remove
)
for j in range(end[i - 1], e):
val = values[j]
(
nobs,
mean_x,
ssqdm_x,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_var(
val,
nobs,
mean_x,
ssqdm_x,
compensation_add,
num_consecutive_same_value,
prev_value, # pyright: ignore[reportGeneralTypeIssues]
)
if nobs >= min_periods and nobs > ddof:
if nobs == 1 or num_consecutive_same_value >= nobs:
result = 0.0
else:
result = ssqdm_x / (nobs - ddof)
else:
result = np.nan
output[i] = result
if not is_monotonic_increasing_bounds:
nobs = 0
mean_x = 0.0
ssqdm_x = 0.0
compensation_remove = 0.0
# na_position is empty list since float64 can already hold nans
# Do list comprehension, since numba cannot figure out that na_pos is
# empty list of ints on its own
na_pos = [0 for i in range(0)]
return output, na_pos
@numba.jit(nopython=True, nogil=True, parallel=False)
def grouped_var(
values: np.ndarray,
result_dtype: np.dtype,
labels: npt.NDArray[np.intp],
ngroups: int,
min_periods: int,
ddof: int = 1,
) -> tuple[np.ndarray, list[int]]:
N = len(labels)
nobs_arr = np.zeros(ngroups, dtype=np.int64)
comp_arr = np.zeros(ngroups, dtype=values.dtype)
consecutive_counts = np.zeros(ngroups, dtype=np.int64)
prev_vals = np.zeros(ngroups, dtype=values.dtype)
output = np.zeros(ngroups, dtype=result_dtype)
means = np.zeros(ngroups, dtype=result_dtype)
for i in range(N):
lab = labels[i]
val = values[i]
if lab < 0:
continue
mean_x = means[lab]
ssqdm_x = output[lab]
nobs = nobs_arr[lab]
compensation_add = comp_arr[lab]
num_consecutive_same_value = consecutive_counts[lab]
prev_value = prev_vals[lab]
(
nobs,
mean_x,
ssqdm_x,
compensation_add,
num_consecutive_same_value,
prev_value,
) = add_var(
val,
nobs,
mean_x,
ssqdm_x,
compensation_add,
num_consecutive_same_value,
prev_value,
)
output[lab] = ssqdm_x
means[lab] = mean_x
consecutive_counts[lab] = num_consecutive_same_value
prev_vals[lab] = prev_value
comp_arr[lab] = compensation_add
nobs_arr[lab] = nobs
# Post-processing, replace vars that don't satisfy min_periods
for lab in range(ngroups):
nobs = nobs_arr[lab]
num_consecutive_same_value = consecutive_counts[lab]
ssqdm_x = output[lab]
if nobs >= min_periods and nobs > ddof:
if nobs == 1 or num_consecutive_same_value >= nobs:
result = 0.0
else:
result = ssqdm_x / (nobs - ddof)
else:
result = np.nan
output[lab] = result
# Second pass to get the std.dev
# na_position is empty list since float64 can already hold nans
# Do list comprehension, since numba cannot figure out that na_pos is
# empty list of ints on its own
na_pos = [0 for i in range(0)]
return output, na_pos

View file

@ -0,0 +1,340 @@
"""
accessor.py contains base classes for implementing accessor properties
that can be mixed into or pinned onto other pandas classes.
"""
from __future__ import annotations
from typing import (
Callable,
final,
)
import warnings
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
class DirNamesMixin:
_accessors: set[str] = set()
_hidden_attrs: frozenset[str] = frozenset()
@final
def _dir_deletions(self) -> set[str]:
"""
Delete unwanted __dir__ for this object.
"""
return self._accessors | self._hidden_attrs
def _dir_additions(self) -> set[str]:
"""
Add additional __dir__ for this object.
"""
return {accessor for accessor in self._accessors if hasattr(self, accessor)}
def __dir__(self) -> list[str]:
"""
Provide method name lookup and completion.
Notes
-----
Only provide 'public' methods.
"""
rv = set(super().__dir__())
rv = (rv - self._dir_deletions()) | self._dir_additions()
return sorted(rv)
class PandasDelegate:
"""
Abstract base class for delegating methods/properties.
"""
def _delegate_property_get(self, name: str, *args, **kwargs):
raise TypeError(f"You cannot access the property {name}")
def _delegate_property_set(self, name: str, value, *args, **kwargs):
raise TypeError(f"The property {name} cannot be set")
def _delegate_method(self, name: str, *args, **kwargs):
raise TypeError(f"You cannot call method {name}")
@classmethod
def _add_delegate_accessors(
cls,
delegate,
accessors: list[str],
typ: str,
overwrite: bool = False,
accessor_mapping: Callable[[str], str] = lambda x: x,
raise_on_missing: bool = True,
) -> None:
"""
Add accessors to cls from the delegate class.
Parameters
----------
cls
Class to add the methods/properties to.
delegate
Class to get methods/properties and doc-strings.
accessors : list of str
List of accessors to add.
typ : {'property', 'method'}
overwrite : bool, default False
Overwrite the method/property in the target class if it exists.
accessor_mapping: Callable, default lambda x: x
Callable to map the delegate's function to the cls' function.
raise_on_missing: bool, default True
Raise if an accessor does not exist on delegate.
False skips the missing accessor.
"""
def _create_delegator_property(name: str):
def _getter(self):
return self._delegate_property_get(name)
def _setter(self, new_values):
return self._delegate_property_set(name, new_values)
_getter.__name__ = name
_setter.__name__ = name
return property(
fget=_getter,
fset=_setter,
doc=getattr(delegate, accessor_mapping(name)).__doc__,
)
def _create_delegator_method(name: str):
def f(self, *args, **kwargs):
return self._delegate_method(name, *args, **kwargs)
f.__name__ = name
f.__doc__ = getattr(delegate, accessor_mapping(name)).__doc__
return f
for name in accessors:
if (
not raise_on_missing
and getattr(delegate, accessor_mapping(name), None) is None
):
continue
if typ == "property":
f = _create_delegator_property(name)
else:
f = _create_delegator_method(name)
# don't overwrite existing methods/properties
if overwrite or not hasattr(cls, name):
setattr(cls, name, f)
def delegate_names(
delegate,
accessors: list[str],
typ: str,
overwrite: bool = False,
accessor_mapping: Callable[[str], str] = lambda x: x,
raise_on_missing: bool = True,
):
"""
Add delegated names to a class using a class decorator. This provides
an alternative usage to directly calling `_add_delegate_accessors`
below a class definition.
Parameters
----------
delegate : object
The class to get methods/properties & doc-strings.
accessors : Sequence[str]
List of accessor to add.
typ : {'property', 'method'}
overwrite : bool, default False
Overwrite the method/property in the target class if it exists.
accessor_mapping: Callable, default lambda x: x
Callable to map the delegate's function to the cls' function.
raise_on_missing: bool, default True
Raise if an accessor does not exist on delegate.
False skips the missing accessor.
Returns
-------
callable
A class decorator.
Examples
--------
@delegate_names(Categorical, ["categories", "ordered"], "property")
class CategoricalAccessor(PandasDelegate):
[...]
"""
def add_delegate_accessors(cls):
cls._add_delegate_accessors(
delegate,
accessors,
typ,
overwrite=overwrite,
accessor_mapping=accessor_mapping,
raise_on_missing=raise_on_missing,
)
return cls
return add_delegate_accessors
# Ported with modifications from xarray
# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py
# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors
# 2. We use a UserWarning instead of a custom Warning
class CachedAccessor:
"""
Custom property-like object.
A descriptor for caching accessors.
Parameters
----------
name : str
Namespace that will be accessed under, e.g. ``df.foo``.
accessor : cls
Class with the extension methods.
Notes
-----
For accessor, The class's __init__ method assumes that one of
``Series``, ``DataFrame`` or ``Index`` as the
single argument ``data``.
"""
def __init__(self, name: str, accessor) -> None:
self._name = name
self._accessor = accessor
def __get__(self, obj, cls):
if obj is None:
# we're accessing the attribute of the class, i.e., Dataset.geo
return self._accessor
accessor_obj = self._accessor(obj)
# Replace the property with the accessor object. Inspired by:
# https://www.pydanny.com/cached-property.html
# We need to use object.__setattr__ because we overwrite __setattr__ on
# NDFrame
object.__setattr__(obj, self._name, accessor_obj)
return accessor_obj
@doc(klass="", others="")
def _register_accessor(name: str, cls):
"""
Register a custom accessor on {klass} objects.
Parameters
----------
name : str
Name under which the accessor should be registered. A warning is issued
if this name conflicts with a preexisting attribute.
Returns
-------
callable
A class decorator.
See Also
--------
register_dataframe_accessor : Register a custom accessor on DataFrame objects.
register_series_accessor : Register a custom accessor on Series objects.
register_index_accessor : Register a custom accessor on Index objects.
Notes
-----
When accessed, your accessor will be initialized with the pandas object
the user is interacting with. So the signature must be
.. code-block:: python
def __init__(self, pandas_object): # noqa: E999
...
For consistency with pandas methods, you should raise an ``AttributeError``
if the data passed to your accessor has an incorrect dtype.
>>> pd.Series(['a', 'b']).dt
Traceback (most recent call last):
...
AttributeError: Can only use .dt accessor with datetimelike values
Examples
--------
In your library code::
import pandas as pd
@pd.api.extensions.register_dataframe_accessor("geo")
class GeoAccessor:
def __init__(self, pandas_obj):
self._obj = pandas_obj
@property
def center(self):
# return the geographic center point of this DataFrame
lat = self._obj.latitude
lon = self._obj.longitude
return (float(lon.mean()), float(lat.mean()))
def plot(self):
# plot this array's data on a map, e.g., using Cartopy
pass
Back in an interactive IPython session:
.. code-block:: ipython
In [1]: ds = pd.DataFrame({{"longitude": np.linspace(0, 10),
...: "latitude": np.linspace(0, 20)}})
In [2]: ds.geo.center
Out[2]: (5.0, 10.0)
In [3]: ds.geo.plot() # plots data on a map
"""
def decorator(accessor):
if hasattr(cls, name):
warnings.warn(
f"registration of accessor {repr(accessor)} under name "
f"{repr(name)} for type {repr(cls)} is overriding a preexisting "
f"attribute with the same name.",
UserWarning,
stacklevel=find_stack_level(),
)
setattr(cls, name, CachedAccessor(name, accessor))
cls._accessors.add(name)
return accessor
return decorator
@doc(_register_accessor, klass="DataFrame")
def register_dataframe_accessor(name: str):
from pandas import DataFrame
return _register_accessor(name, DataFrame)
@doc(_register_accessor, klass="Series")
def register_series_accessor(name: str):
from pandas import Series
return _register_accessor(name, Series)
@doc(_register_accessor, klass="Index")
def register_index_accessor(name: str):
from pandas import Index
return _register_accessor(name, Index)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,140 @@
from pandas._libs import (
NaT,
Period,
Timedelta,
Timestamp,
)
from pandas._libs.missing import NA
from pandas.core.dtypes.dtypes import (
ArrowDtype,
CategoricalDtype,
DatetimeTZDtype,
IntervalDtype,
PeriodDtype,
)
from pandas.core.dtypes.missing import (
isna,
isnull,
notna,
notnull,
)
from pandas.core.algorithms import (
factorize,
unique,
value_counts,
)
from pandas.core.arrays import Categorical
from pandas.core.arrays.boolean import BooleanDtype
from pandas.core.arrays.floating import (
Float32Dtype,
Float64Dtype,
)
from pandas.core.arrays.integer import (
Int8Dtype,
Int16Dtype,
Int32Dtype,
Int64Dtype,
UInt8Dtype,
UInt16Dtype,
UInt32Dtype,
UInt64Dtype,
)
from pandas.core.arrays.string_ import StringDtype
from pandas.core.construction import array
from pandas.core.flags import Flags
from pandas.core.groupby import (
Grouper,
NamedAgg,
)
from pandas.core.indexes.api import (
CategoricalIndex,
DatetimeIndex,
Index,
IntervalIndex,
MultiIndex,
PeriodIndex,
RangeIndex,
TimedeltaIndex,
)
from pandas.core.indexes.datetimes import (
bdate_range,
date_range,
)
from pandas.core.indexes.interval import (
Interval,
interval_range,
)
from pandas.core.indexes.period import period_range
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.indexing import IndexSlice
from pandas.core.series import Series
from pandas.core.tools.datetimes import to_datetime
from pandas.core.tools.numeric import to_numeric
from pandas.core.tools.timedeltas import to_timedelta
from pandas.io.formats.format import set_eng_float_format
from pandas.tseries.offsets import DateOffset
# DataFrame needs to be imported after NamedAgg to avoid a circular import
from pandas.core.frame import DataFrame # isort:skip
__all__ = [
"array",
"ArrowDtype",
"bdate_range",
"BooleanDtype",
"Categorical",
"CategoricalDtype",
"CategoricalIndex",
"DataFrame",
"DateOffset",
"date_range",
"DatetimeIndex",
"DatetimeTZDtype",
"factorize",
"Flags",
"Float32Dtype",
"Float64Dtype",
"Grouper",
"Index",
"IndexSlice",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"Int8Dtype",
"Interval",
"IntervalDtype",
"IntervalIndex",
"interval_range",
"isna",
"isnull",
"MultiIndex",
"NA",
"NamedAgg",
"NaT",
"notna",
"notnull",
"Period",
"PeriodDtype",
"PeriodIndex",
"period_range",
"RangeIndex",
"Series",
"set_eng_float_format",
"StringDtype",
"Timedelta",
"TimedeltaIndex",
"timedelta_range",
"Timestamp",
"to_datetime",
"to_numeric",
"to_timedelta",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"UInt8Dtype",
"unique",
"value_counts",
]

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,9 @@
"""
core.array_algos is for algorithms that operate on ndarray and ExtensionArray.
These should:
- Assume that any Index, Series, or DataFrame objects have already been unwrapped.
- Assume that any list arguments have already been cast to ndarray/EA.
- Not depend on Index, Series, or DataFrame, nor import any of these.
- May dispatch to ExtensionArray methods, but should not import from core.arrays.
"""

View file

@ -0,0 +1,67 @@
"""
datetimelke_accumulations.py is for accumulations of datetimelike extension arrays
"""
from __future__ import annotations
from typing import Callable
import numpy as np
from pandas._libs import iNaT
from pandas.core.dtypes.missing import isna
def _cum_func(
func: Callable,
values: np.ndarray,
*,
skipna: bool = True,
):
"""
Accumulations for 1D datetimelike arrays.
Parameters
----------
func : np.cumsum, np.maximum.accumulate, np.minimum.accumulate
values : np.ndarray
Numpy array with the values (can be of any dtype that support the
operation). Values is changed is modified inplace.
skipna : bool, default True
Whether to skip NA.
"""
try:
fill_value = {
np.maximum.accumulate: np.iinfo(np.int64).min,
np.cumsum: 0,
np.minimum.accumulate: np.iinfo(np.int64).max,
}[func]
except KeyError:
raise ValueError(f"No accumulation for {func} implemented on BaseMaskedArray")
mask = isna(values)
y = values.view("i8")
y[mask] = fill_value
if not skipna:
mask = np.maximum.accumulate(mask)
result = func(y)
result[mask] = iNaT
if values.dtype.kind in "mM":
return result.view(values.dtype.base)
return result
def cumsum(values: np.ndarray, *, skipna: bool = True) -> np.ndarray:
return _cum_func(np.cumsum, values, skipna=skipna)
def cummin(values: np.ndarray, *, skipna: bool = True):
return _cum_func(np.minimum.accumulate, values, skipna=skipna)
def cummax(values: np.ndarray, *, skipna: bool = True):
return _cum_func(np.maximum.accumulate, values, skipna=skipna)

View file

@ -0,0 +1,90 @@
"""
masked_accumulations.py is for accumulation algorithms using a mask-based approach
for missing values.
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Callable,
)
import numpy as np
if TYPE_CHECKING:
from pandas._typing import npt
def _cum_func(
func: Callable,
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
):
"""
Accumulations for 1D masked array.
We will modify values in place to replace NAs with the appropriate fill value.
Parameters
----------
func : np.cumsum, np.cumprod, np.maximum.accumulate, np.minimum.accumulate
values : np.ndarray
Numpy array with the values (can be of any dtype that support the
operation).
mask : np.ndarray
Boolean numpy array (True values indicate missing values).
skipna : bool, default True
Whether to skip NA.
"""
dtype_info: np.iinfo | np.finfo
if values.dtype.kind == "f":
dtype_info = np.finfo(values.dtype.type)
elif values.dtype.kind in "iu":
dtype_info = np.iinfo(values.dtype.type)
elif values.dtype.kind == "b":
# Max value of bool is 1, but since we are setting into a boolean
# array, 255 is fine as well. Min value has to be 0 when setting
# into the boolean array.
dtype_info = np.iinfo(np.uint8)
else:
raise NotImplementedError(
f"No masked accumulation defined for dtype {values.dtype.type}"
)
try:
fill_value = {
np.cumprod: 1,
np.maximum.accumulate: dtype_info.min,
np.cumsum: 0,
np.minimum.accumulate: dtype_info.max,
}[func]
except KeyError:
raise NotImplementedError(
f"No accumulation for {func} implemented on BaseMaskedArray"
)
values[mask] = fill_value
if not skipna:
mask = np.maximum.accumulate(mask)
values = func(values)
return values, mask
def cumsum(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
return _cum_func(np.cumsum, values, mask, skipna=skipna)
def cumprod(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
return _cum_func(np.cumprod, values, mask, skipna=skipna)
def cummin(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
return _cum_func(np.minimum.accumulate, values, mask, skipna=skipna)
def cummax(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
return _cum_func(np.maximum.accumulate, values, mask, skipna=skipna)

View file

@ -0,0 +1,197 @@
"""
masked_reductions.py is for reduction algorithms using a mask-based approach
for missing values.
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Callable,
)
import warnings
import numpy as np
from pandas._libs import missing as libmissing
from pandas.core.nanops import check_below_min_count
if TYPE_CHECKING:
from pandas._typing import (
AxisInt,
npt,
)
def _reductions(
func: Callable,
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
min_count: int = 0,
axis: AxisInt | None = None,
**kwargs,
):
"""
Sum, mean or product for 1D masked array.
Parameters
----------
func : np.sum or np.prod
values : np.ndarray
Numpy array with the values (can be of any dtype that support the
operation).
mask : np.ndarray[bool]
Boolean numpy array (True values indicate missing values).
skipna : bool, default True
Whether to skip NA.
min_count : int, default 0
The required number of valid values to perform the operation. If fewer than
``min_count`` non-NA values are present the result will be NA.
axis : int, optional, default None
"""
if not skipna:
if mask.any() or check_below_min_count(values.shape, None, min_count):
return libmissing.NA
else:
return func(values, axis=axis, **kwargs)
else:
if check_below_min_count(values.shape, mask, min_count) and (
axis is None or values.ndim == 1
):
return libmissing.NA
return func(values, where=~mask, axis=axis, **kwargs)
def sum(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
min_count: int = 0,
axis: AxisInt | None = None,
):
return _reductions(
np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis
)
def prod(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
min_count: int = 0,
axis: AxisInt | None = None,
):
return _reductions(
np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis
)
def _minmax(
func: Callable,
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
axis: AxisInt | None = None,
):
"""
Reduction for 1D masked array.
Parameters
----------
func : np.min or np.max
values : np.ndarray
Numpy array with the values (can be of any dtype that support the
operation).
mask : np.ndarray[bool]
Boolean numpy array (True values indicate missing values).
skipna : bool, default True
Whether to skip NA.
axis : int, optional, default None
"""
if not skipna:
if mask.any() or not values.size:
# min/max with empty array raise in numpy, pandas returns NA
return libmissing.NA
else:
return func(values, axis=axis)
else:
subset = values[~mask]
if subset.size:
return func(subset, axis=axis)
else:
# min/max with empty array raise in numpy, pandas returns NA
return libmissing.NA
def min(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
axis: AxisInt | None = None,
):
return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis)
def max(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
axis: AxisInt | None = None,
):
return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis)
def mean(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
axis: AxisInt | None = None,
):
if not values.size or mask.all():
return libmissing.NA
return _reductions(np.mean, values=values, mask=mask, skipna=skipna, axis=axis)
def var(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
axis: AxisInt | None = None,
ddof: int = 1,
):
if not values.size or mask.all():
return libmissing.NA
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
return _reductions(
np.var, values=values, mask=mask, skipna=skipna, axis=axis, ddof=ddof
)
def std(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
*,
skipna: bool = True,
axis: AxisInt | None = None,
ddof: int = 1,
):
if not values.size or mask.all():
return libmissing.NA
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
return _reductions(
np.std, values=values, mask=mask, skipna=skipna, axis=axis, ddof=ddof
)

View file

@ -0,0 +1,149 @@
"""
EA-compatible analogue to np.putmask
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
)
import numpy as np
from pandas._libs import lib
from pandas.core.dtypes.cast import infer_dtype_from
from pandas.core.dtypes.common import is_list_like
from pandas.core.arrays import ExtensionArray
if TYPE_CHECKING:
from pandas._typing import (
ArrayLike,
npt,
)
from pandas import MultiIndex
def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
"""
ExtensionArray-compatible implementation of np.putmask. The main
difference is we do not handle repeating or truncating like numpy.
Parameters
----------
values: np.ndarray or ExtensionArray
mask : np.ndarray[bool]
We assume extract_bool_array has already been called.
value : Any
"""
if (
not isinstance(values, np.ndarray)
or (values.dtype == object and not lib.is_scalar(value))
# GH#43424: np.putmask raises TypeError if we cannot cast between types with
# rule = "safe", a stricter guarantee we may not have here
or (
isinstance(value, np.ndarray) and not np.can_cast(value.dtype, values.dtype)
)
):
# GH#19266 using np.putmask gives unexpected results with listlike value
# along with object dtype
if is_list_like(value) and len(value) == len(values):
values[mask] = value[mask]
else:
values[mask] = value
else:
# GH#37833 np.putmask is more performant than __setitem__
np.putmask(values, mask, value)
def putmask_without_repeat(
values: np.ndarray, mask: npt.NDArray[np.bool_], new: Any
) -> None:
"""
np.putmask will truncate or repeat if `new` is a listlike with
len(new) != len(values). We require an exact match.
Parameters
----------
values : np.ndarray
mask : np.ndarray[bool]
new : Any
"""
if getattr(new, "ndim", 0) >= 1:
new = new.astype(values.dtype, copy=False)
# TODO: this prob needs some better checking for 2D cases
nlocs = mask.sum()
if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1:
shape = np.shape(new)
# np.shape compat for if setitem_datetimelike_compat
# changed arraylike to list e.g. test_where_dt64_2d
if nlocs == shape[-1]:
# GH#30567
# If length of ``new`` is less than the length of ``values``,
# `np.putmask` would first repeat the ``new`` array and then
# assign the masked values hence produces incorrect result.
# `np.place` on the other hand uses the ``new`` values at it is
# to place in the masked locations of ``values``
np.place(values, mask, new)
# i.e. values[mask] = new
elif mask.shape[-1] == shape[-1] or shape[-1] == 1:
np.putmask(values, mask, new)
else:
raise ValueError("cannot assign mismatch length to masked array")
else:
np.putmask(values, mask, new)
def validate_putmask(
values: ArrayLike | MultiIndex, mask: np.ndarray
) -> tuple[npt.NDArray[np.bool_], bool]:
"""
Validate mask and check if this putmask operation is a no-op.
"""
mask = extract_bool_array(mask)
if mask.shape != values.shape:
raise ValueError("putmask: mask and data must be the same size")
noop = not mask.any()
return mask, noop
def extract_bool_array(mask: ArrayLike) -> npt.NDArray[np.bool_]:
"""
If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
"""
if isinstance(mask, ExtensionArray):
# We could have BooleanArray, Sparse[bool], ...
# Except for BooleanArray, this is equivalent to just
# np.asarray(mask, dtype=bool)
mask = mask.to_numpy(dtype=bool, na_value=False)
mask = np.asarray(mask, dtype=bool)
return mask
def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
"""
Parameters
----------
values : np.ndarray
num_set : int
For putmask, this is mask.sum()
other : Any
"""
if values.dtype == object:
dtype, _ = infer_dtype_from(other)
if lib.is_np_dtype(dtype, "mM"):
# https://github.com/numpy/numpy/issues/12550
# timedelta64 will incorrectly cast to int
if not is_list_like(other):
other = [other] * num_set
else:
other = list(other)
return other

View file

@ -0,0 +1,226 @@
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
)
if TYPE_CHECKING:
from pandas._typing import (
ArrayLike,
Scalar,
npt,
)
def quantile_compat(
values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
) -> ArrayLike:
"""
Compute the quantiles of the given values for each quantile in `qs`.
Parameters
----------
values : np.ndarray or ExtensionArray
qs : np.ndarray[float64]
interpolation : str
Returns
-------
np.ndarray or ExtensionArray
"""
if isinstance(values, np.ndarray):
fill_value = na_value_for_dtype(values.dtype, compat=False)
mask = isna(values)
return quantile_with_mask(values, mask, fill_value, qs, interpolation)
else:
return values._quantile(qs, interpolation)
def quantile_with_mask(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
fill_value,
qs: npt.NDArray[np.float64],
interpolation: str,
) -> np.ndarray:
"""
Compute the quantiles of the given values for each quantile in `qs`.
Parameters
----------
values : np.ndarray
For ExtensionArray, this is _values_for_factorize()[0]
mask : np.ndarray[bool]
mask = isna(values)
For ExtensionArray, this is computed before calling _value_for_factorize
fill_value : Scalar
The value to interpret fill NA entries with
For ExtensionArray, this is _values_for_factorize()[1]
qs : np.ndarray[float64]
interpolation : str
Type of interpolation
Returns
-------
np.ndarray
Notes
-----
Assumes values is already 2D. For ExtensionArray this means np.atleast_2d
has been called on _values_for_factorize()[0]
Quantile is computed along axis=1.
"""
assert values.shape == mask.shape
if values.ndim == 1:
# unsqueeze, operate, re-squeeze
values = np.atleast_2d(values)
mask = np.atleast_2d(mask)
res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation)
return res_values[0]
assert values.ndim == 2
is_empty = values.shape[1] == 0
if is_empty:
# create the array of na_values
# 2d len(values) * len(qs)
flat = np.array([fill_value] * len(qs))
result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
else:
result = _nanpercentile(
values,
qs * 100.0,
na_value=fill_value,
mask=mask,
interpolation=interpolation,
)
result = np.array(result, copy=False)
result = result.T
return result
def _nanpercentile_1d(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
qs: npt.NDArray[np.float64],
na_value: Scalar,
interpolation: str,
) -> Scalar | np.ndarray:
"""
Wrapper for np.percentile that skips missing values, specialized to
1-dimensional case.
Parameters
----------
values : array over which to find quantiles
mask : ndarray[bool]
locations in values that should be considered missing
qs : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
interpolation : str
Returns
-------
quantiles : scalar or array
"""
# mask is Union[ExtensionArray, ndarray]
values = values[~mask]
if len(values) == 0:
# Can't pass dtype=values.dtype here bc we might have na_value=np.nan
# with values.dtype=int64 see test_quantile_empty
# equiv: 'np.array([na_value] * len(qs))' but much faster
return np.full(len(qs), na_value)
return np.percentile(
values,
qs,
# error: No overload variant of "percentile" matches argument
# types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]"
# , "Dict[str, str]" [call-overload]
method=interpolation, # type: ignore[call-overload]
)
def _nanpercentile(
values: np.ndarray,
qs: npt.NDArray[np.float64],
*,
na_value,
mask: npt.NDArray[np.bool_],
interpolation: str,
):
"""
Wrapper for np.percentile that skips missing values.
Parameters
----------
values : np.ndarray[ndim=2] over which to find quantiles
qs : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
mask : np.ndarray[bool]
locations in values that should be considered missing
interpolation : str
Returns
-------
quantiles : scalar or array
"""
if values.dtype.kind in "mM":
# need to cast to integer to avoid rounding errors in numpy
result = _nanpercentile(
values.view("i8"),
qs=qs,
na_value=na_value.view("i8"),
mask=mask,
interpolation=interpolation,
)
# Note: we have to do `astype` and not view because in general we
# have float result at this point, not i8
return result.astype(values.dtype)
if mask.any():
# Caller is responsible for ensuring mask shape match
assert mask.shape == values.shape
result = [
_nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
for (val, m) in zip(list(values), list(mask))
]
if values.dtype.kind == "f":
# preserve itemsize
result = np.array(result, dtype=values.dtype, copy=False).T
else:
result = np.array(result, copy=False).T
if (
result.dtype != values.dtype
and not mask.all()
and (result == result.astype(values.dtype, copy=False)).all()
):
# mask.all() will never get cast back to int
# e.g. values id integer dtype and result is floating dtype,
# only cast back to integer dtype if result values are all-integer.
result = result.astype(values.dtype, copy=False)
return result
else:
return np.percentile(
values,
qs,
axis=1,
# error: No overload variant of "percentile" matches argument types
# "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]",
# "int", "Dict[str, str]" [call-overload]
method=interpolation, # type: ignore[call-overload]
)

View file

@ -0,0 +1,152 @@
"""
Methods used by Block.replace and related methods.
"""
from __future__ import annotations
import operator
import re
from re import Pattern
from typing import (
TYPE_CHECKING,
Any,
)
import numpy as np
from pandas.core.dtypes.common import (
is_bool,
is_re,
is_re_compilable,
)
from pandas.core.dtypes.missing import isna
if TYPE_CHECKING:
from pandas._typing import (
ArrayLike,
Scalar,
npt,
)
def should_use_regex(regex: bool, to_replace: Any) -> bool:
"""
Decide whether to treat `to_replace` as a regular expression.
"""
if is_re(to_replace):
regex = True
regex = regex and is_re_compilable(to_replace)
# Don't use regex if the pattern is empty.
regex = regex and re.compile(to_replace).pattern != ""
return regex
def compare_or_regex_search(
a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_]
) -> ArrayLike:
"""
Compare two array-like inputs of the same shape or two scalar values
Calls operator.eq or re.search, depending on regex argument. If regex is
True, perform an element-wise regex matching.
Parameters
----------
a : array-like
b : scalar or regex pattern
regex : bool
mask : np.ndarray[bool]
Returns
-------
mask : array-like of bool
"""
if isna(b):
return ~mask
def _check_comparison_types(
result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern
):
"""
Raises an error if the two arrays (a,b) cannot be compared.
Otherwise, returns the comparison result as expected.
"""
if is_bool(result) and isinstance(a, np.ndarray):
type_names = [type(a).__name__, type(b).__name__]
type_names[0] = f"ndarray(dtype={a.dtype})"
raise TypeError(
f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
)
if not regex or not should_use_regex(regex, b):
# TODO: should use missing.mask_missing?
op = lambda x: operator.eq(x, b)
else:
op = np.vectorize(
lambda x: bool(re.search(b, x))
if isinstance(x, str) and isinstance(b, (str, Pattern))
else False
)
# GH#32621 use mask to avoid comparing to NAs
if isinstance(a, np.ndarray):
a = a[mask]
result = op(a)
if isinstance(result, np.ndarray) and mask is not None:
# The shape of the mask can differ to that of the result
# since we may compare only a subset of a's or b's elements
tmp = np.zeros(mask.shape, dtype=np.bool_)
np.place(tmp, mask, result)
result = tmp
_check_comparison_types(result, a, b)
return result
def replace_regex(
values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None
) -> None:
"""
Parameters
----------
values : ArrayLike
Object dtype.
rx : re.Pattern
value : Any
mask : np.ndarray[bool], optional
Notes
-----
Alters values in-place.
"""
# deal with replacing values with objects (strings) that match but
# whose replacement is not a string (numeric, nan, object)
if isna(value) or not isinstance(value, str):
def re_replacer(s):
if is_re(rx) and isinstance(s, str):
return value if rx.search(s) is not None else s
else:
return s
else:
# value is guaranteed to be a string here, s can be either a string
# or null if it's null it gets returned
def re_replacer(s):
if is_re(rx) and isinstance(s, str):
return rx.sub(value, s)
else:
return s
f = np.vectorize(re_replacer, otypes=[np.object_])
if mask is None:
values[:] = f(values)
else:
values[mask] = f(values[mask])

View file

@ -0,0 +1,595 @@
from __future__ import annotations
import functools
from typing import (
TYPE_CHECKING,
cast,
overload,
)
import numpy as np
from pandas._libs import (
algos as libalgos,
lib,
)
from pandas.core.dtypes.cast import maybe_promote
from pandas.core.dtypes.common import (
ensure_platform_int,
is_1d_only_ea_dtype,
)
from pandas.core.dtypes.missing import na_value_for_dtype
from pandas.core.construction import ensure_wrapped_if_datetimelike
if TYPE_CHECKING:
from pandas._typing import (
ArrayLike,
AxisInt,
npt,
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.arrays.base import ExtensionArray
@overload
def take_nd(
arr: np.ndarray,
indexer,
axis: AxisInt = ...,
fill_value=...,
allow_fill: bool = ...,
) -> np.ndarray:
...
@overload
def take_nd(
arr: ExtensionArray,
indexer,
axis: AxisInt = ...,
fill_value=...,
allow_fill: bool = ...,
) -> ArrayLike:
...
def take_nd(
arr: ArrayLike,
indexer,
axis: AxisInt = 0,
fill_value=lib.no_default,
allow_fill: bool = True,
) -> ArrayLike:
"""
Specialized Cython take which sets NaN values in one pass
This dispatches to ``take`` defined on ExtensionArrays. It does not
currently dispatch to ``SparseArray.take`` for sparse ``arr``.
Note: this function assumes that the indexer is a valid(ated) indexer with
no out of bound indices.
Parameters
----------
arr : np.ndarray or ExtensionArray
Input array.
indexer : ndarray
1-D array of indices to take, subarrays corresponding to -1 value
indices are filed with fill_value
axis : int, default 0
Axis to take from
fill_value : any, default np.nan
Fill value to replace -1 values with
allow_fill : bool, default True
If False, indexer is assumed to contain no -1 values so no filling
will be done. This short-circuits computation of a mask. Result is
undefined if allow_fill == False and -1 is present in indexer.
Returns
-------
subarray : np.ndarray or ExtensionArray
May be the same type as the input, or cast to an ndarray.
"""
if fill_value is lib.no_default:
fill_value = na_value_for_dtype(arr.dtype, compat=False)
elif lib.is_np_dtype(arr.dtype, "mM"):
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
if arr.dtype != dtype:
# EA.take is strict about returning a new object of the same type
# so for that case cast upfront
arr = arr.astype(dtype)
if not isinstance(arr, np.ndarray):
# i.e. ExtensionArray,
# includes for EA to catch DatetimeArray, TimedeltaArray
if not is_1d_only_ea_dtype(arr.dtype):
# i.e. DatetimeArray, TimedeltaArray
arr = cast("NDArrayBackedExtensionArray", arr)
return arr.take(
indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
)
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
arr = np.asarray(arr)
return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
def _take_nd_ndarray(
arr: np.ndarray,
indexer: npt.NDArray[np.intp] | None,
axis: AxisInt,
fill_value,
allow_fill: bool,
) -> np.ndarray:
if indexer is None:
indexer = np.arange(arr.shape[axis], dtype=np.intp)
dtype, fill_value = arr.dtype, arr.dtype.type()
else:
indexer = ensure_platform_int(indexer)
dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
arr, indexer, fill_value, allow_fill
)
flip_order = False
if arr.ndim == 2 and arr.flags.f_contiguous:
flip_order = True
if flip_order:
arr = arr.T
axis = arr.ndim - axis - 1
# at this point, it's guaranteed that dtype can hold both the arr values
# and the fill_value
out_shape_ = list(arr.shape)
out_shape_[axis] = len(indexer)
out_shape = tuple(out_shape_)
if arr.flags.f_contiguous and axis == arr.ndim - 1:
# minor tweak that can make an order-of-magnitude difference
# for dataframes initialized directly from 2-d ndarrays
# (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
# f-contiguous transpose)
out = np.empty(out_shape, dtype=dtype, order="F")
else:
out = np.empty(out_shape, dtype=dtype)
func = _get_take_nd_function(
arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
)
func(arr, indexer, out, fill_value)
if flip_order:
out = out.T
return out
def take_1d(
arr: ArrayLike,
indexer: npt.NDArray[np.intp],
fill_value=None,
allow_fill: bool = True,
mask: npt.NDArray[np.bool_] | None = None,
) -> ArrayLike:
"""
Specialized version for 1D arrays. Differences compared to `take_nd`:
- Assumes input array has already been converted to numpy array / EA
- Assumes indexer is already guaranteed to be intp dtype ndarray
- Only works for 1D arrays
To ensure the lowest possible overhead.
Note: similarly to `take_nd`, this function assumes that the indexer is
a valid(ated) indexer with no out of bound indices.
Parameters
----------
arr : np.ndarray or ExtensionArray
Input array.
indexer : ndarray
1-D array of indices to take (validated indices, intp dtype).
fill_value : any, default np.nan
Fill value to replace -1 values with
allow_fill : bool, default True
If False, indexer is assumed to contain no -1 values so no filling
will be done. This short-circuits computation of a mask. Result is
undefined if allow_fill == False and -1 is present in indexer.
mask : np.ndarray, optional, default None
If `allow_fill` is True, and the mask (where indexer == -1) is already
known, it can be passed to avoid recomputation.
"""
if not isinstance(arr, np.ndarray):
# ExtensionArray -> dispatch to their method
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
if not allow_fill:
return arr.take(indexer)
dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
arr, indexer, fill_value, True, mask
)
# at this point, it's guaranteed that dtype can hold both the arr values
# and the fill_value
out = np.empty(indexer.shape, dtype=dtype)
func = _get_take_nd_function(
arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info
)
func(arr, indexer, out, fill_value)
return out
def take_2d_multi(
arr: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
fill_value=np.nan,
) -> np.ndarray:
"""
Specialized Cython take which sets NaN values in one pass.
"""
# This is only called from one place in DataFrame._reindex_multi,
# so we know indexer is well-behaved.
assert indexer is not None
assert indexer[0] is not None
assert indexer[1] is not None
row_idx, col_idx = indexer
row_idx = ensure_platform_int(row_idx)
col_idx = ensure_platform_int(col_idx)
indexer = row_idx, col_idx
mask_info = None
# check for promotion based on types only (do this first because
# it's faster than computing a mask)
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
if dtype != arr.dtype:
# check if promotion is actually required based on indexer
row_mask = row_idx == -1
col_mask = col_idx == -1
row_needs = row_mask.any()
col_needs = col_mask.any()
mask_info = (row_mask, col_mask), (row_needs, col_needs)
if not (row_needs or col_needs):
# if not, then depromote, set fill_value to dummy
# (it won't be used but we don't want the cython code
# to crash when trying to cast it to dtype)
dtype, fill_value = arr.dtype, arr.dtype.type()
# at this point, it's guaranteed that dtype can hold both the arr values
# and the fill_value
out_shape = len(row_idx), len(col_idx)
out = np.empty(out_shape, dtype=dtype)
func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None)
if func is None and arr.dtype != out.dtype:
func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None)
if func is not None:
func = _convert_wrapper(func, out.dtype)
if func is not None:
func(arr, indexer, out=out, fill_value=fill_value)
else:
# test_reindex_multi
_take_2d_multi_object(
arr, indexer, out, fill_value=fill_value, mask_info=mask_info
)
return out
@functools.lru_cache
def _get_take_nd_function_cached(
ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: AxisInt
):
"""
Part of _get_take_nd_function below that doesn't need `mask_info` and thus
can be cached (mask_info potentially contains a numpy ndarray which is not
hashable and thus cannot be used as argument for cached function).
"""
tup = (arr_dtype.name, out_dtype.name)
if ndim == 1:
func = _take_1d_dict.get(tup, None)
elif ndim == 2:
if axis == 0:
func = _take_2d_axis0_dict.get(tup, None)
else:
func = _take_2d_axis1_dict.get(tup, None)
if func is not None:
return func
# We get here with string, uint, float16, and complex dtypes that could
# potentially be handled in algos_take_helper.
# Also a couple with (M8[ns], object) and (m8[ns], object)
tup = (out_dtype.name, out_dtype.name)
if ndim == 1:
func = _take_1d_dict.get(tup, None)
elif ndim == 2:
if axis == 0:
func = _take_2d_axis0_dict.get(tup, None)
else:
func = _take_2d_axis1_dict.get(tup, None)
if func is not None:
func = _convert_wrapper(func, out_dtype)
return func
return None
def _get_take_nd_function(
ndim: int,
arr_dtype: np.dtype,
out_dtype: np.dtype,
axis: AxisInt = 0,
mask_info=None,
):
"""
Get the appropriate "take" implementation for the given dimension, axis
and dtypes.
"""
func = None
if ndim <= 2:
# for this part we don't need `mask_info` -> use the cached algo lookup
func = _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis)
if func is None:
def func(arr, indexer, out, fill_value=np.nan) -> None:
indexer = ensure_platform_int(indexer)
_take_nd_object(
arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info
)
return func
def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None):
def wrapper(
arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
) -> None:
if arr_dtype is not None:
arr = arr.view(arr_dtype)
if out_dtype is not None:
out = out.view(out_dtype)
if fill_wrap is not None:
# FIXME: if we get here with dt64/td64 we need to be sure we have
# matching resos
if fill_value.dtype.kind == "m":
fill_value = fill_value.astype("m8[ns]")
else:
fill_value = fill_value.astype("M8[ns]")
fill_value = fill_wrap(fill_value)
f(arr, indexer, out, fill_value=fill_value)
return wrapper
def _convert_wrapper(f, conv_dtype):
def wrapper(
arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
) -> None:
if conv_dtype == object:
# GH#39755 avoid casting dt64/td64 to integers
arr = ensure_wrapped_if_datetimelike(arr)
arr = arr.astype(conv_dtype)
f(arr, indexer, out, fill_value=fill_value)
return wrapper
_take_1d_dict = {
("int8", "int8"): libalgos.take_1d_int8_int8,
("int8", "int32"): libalgos.take_1d_int8_int32,
("int8", "int64"): libalgos.take_1d_int8_int64,
("int8", "float64"): libalgos.take_1d_int8_float64,
("int16", "int16"): libalgos.take_1d_int16_int16,
("int16", "int32"): libalgos.take_1d_int16_int32,
("int16", "int64"): libalgos.take_1d_int16_int64,
("int16", "float64"): libalgos.take_1d_int16_float64,
("int32", "int32"): libalgos.take_1d_int32_int32,
("int32", "int64"): libalgos.take_1d_int32_int64,
("int32", "float64"): libalgos.take_1d_int32_float64,
("int64", "int64"): libalgos.take_1d_int64_int64,
("int64", "float64"): libalgos.take_1d_int64_float64,
("float32", "float32"): libalgos.take_1d_float32_float32,
("float32", "float64"): libalgos.take_1d_float32_float64,
("float64", "float64"): libalgos.take_1d_float64_float64,
("object", "object"): libalgos.take_1d_object_object,
("bool", "bool"): _view_wrapper(libalgos.take_1d_bool_bool, np.uint8, np.uint8),
("bool", "object"): _view_wrapper(libalgos.take_1d_bool_object, np.uint8, None),
("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64
),
("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64
),
}
_take_2d_axis0_dict = {
("int8", "int8"): libalgos.take_2d_axis0_int8_int8,
("int8", "int32"): libalgos.take_2d_axis0_int8_int32,
("int8", "int64"): libalgos.take_2d_axis0_int8_int64,
("int8", "float64"): libalgos.take_2d_axis0_int8_float64,
("int16", "int16"): libalgos.take_2d_axis0_int16_int16,
("int16", "int32"): libalgos.take_2d_axis0_int16_int32,
("int16", "int64"): libalgos.take_2d_axis0_int16_int64,
("int16", "float64"): libalgos.take_2d_axis0_int16_float64,
("int32", "int32"): libalgos.take_2d_axis0_int32_int32,
("int32", "int64"): libalgos.take_2d_axis0_int32_int64,
("int32", "float64"): libalgos.take_2d_axis0_int32_float64,
("int64", "int64"): libalgos.take_2d_axis0_int64_int64,
("int64", "float64"): libalgos.take_2d_axis0_int64_float64,
("float32", "float32"): libalgos.take_2d_axis0_float32_float32,
("float32", "float64"): libalgos.take_2d_axis0_float32_float64,
("float64", "float64"): libalgos.take_2d_axis0_float64_float64,
("object", "object"): libalgos.take_2d_axis0_object_object,
("bool", "bool"): _view_wrapper(
libalgos.take_2d_axis0_bool_bool, np.uint8, np.uint8
),
("bool", "object"): _view_wrapper(
libalgos.take_2d_axis0_bool_object, np.uint8, None
),
("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64
),
("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64
),
}
_take_2d_axis1_dict = {
("int8", "int8"): libalgos.take_2d_axis1_int8_int8,
("int8", "int32"): libalgos.take_2d_axis1_int8_int32,
("int8", "int64"): libalgos.take_2d_axis1_int8_int64,
("int8", "float64"): libalgos.take_2d_axis1_int8_float64,
("int16", "int16"): libalgos.take_2d_axis1_int16_int16,
("int16", "int32"): libalgos.take_2d_axis1_int16_int32,
("int16", "int64"): libalgos.take_2d_axis1_int16_int64,
("int16", "float64"): libalgos.take_2d_axis1_int16_float64,
("int32", "int32"): libalgos.take_2d_axis1_int32_int32,
("int32", "int64"): libalgos.take_2d_axis1_int32_int64,
("int32", "float64"): libalgos.take_2d_axis1_int32_float64,
("int64", "int64"): libalgos.take_2d_axis1_int64_int64,
("int64", "float64"): libalgos.take_2d_axis1_int64_float64,
("float32", "float32"): libalgos.take_2d_axis1_float32_float32,
("float32", "float64"): libalgos.take_2d_axis1_float32_float64,
("float64", "float64"): libalgos.take_2d_axis1_float64_float64,
("object", "object"): libalgos.take_2d_axis1_object_object,
("bool", "bool"): _view_wrapper(
libalgos.take_2d_axis1_bool_bool, np.uint8, np.uint8
),
("bool", "object"): _view_wrapper(
libalgos.take_2d_axis1_bool_object, np.uint8, None
),
("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64
),
("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64
),
}
_take_2d_multi_dict = {
("int8", "int8"): libalgos.take_2d_multi_int8_int8,
("int8", "int32"): libalgos.take_2d_multi_int8_int32,
("int8", "int64"): libalgos.take_2d_multi_int8_int64,
("int8", "float64"): libalgos.take_2d_multi_int8_float64,
("int16", "int16"): libalgos.take_2d_multi_int16_int16,
("int16", "int32"): libalgos.take_2d_multi_int16_int32,
("int16", "int64"): libalgos.take_2d_multi_int16_int64,
("int16", "float64"): libalgos.take_2d_multi_int16_float64,
("int32", "int32"): libalgos.take_2d_multi_int32_int32,
("int32", "int64"): libalgos.take_2d_multi_int32_int64,
("int32", "float64"): libalgos.take_2d_multi_int32_float64,
("int64", "int64"): libalgos.take_2d_multi_int64_int64,
("int64", "float64"): libalgos.take_2d_multi_int64_float64,
("float32", "float32"): libalgos.take_2d_multi_float32_float32,
("float32", "float64"): libalgos.take_2d_multi_float32_float64,
("float64", "float64"): libalgos.take_2d_multi_float64_float64,
("object", "object"): libalgos.take_2d_multi_object_object,
("bool", "bool"): _view_wrapper(
libalgos.take_2d_multi_bool_bool, np.uint8, np.uint8
),
("bool", "object"): _view_wrapper(
libalgos.take_2d_multi_bool_object, np.uint8, None
),
("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
),
("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
),
}
def _take_nd_object(
arr: np.ndarray,
indexer: npt.NDArray[np.intp],
out: np.ndarray,
axis: AxisInt,
fill_value,
mask_info,
) -> None:
if mask_info is not None:
mask, needs_masking = mask_info
else:
mask = indexer == -1
needs_masking = mask.any()
if arr.dtype != out.dtype:
arr = arr.astype(out.dtype)
if arr.shape[axis] > 0:
arr.take(indexer, axis=axis, out=out)
if needs_masking:
outindexer = [slice(None)] * arr.ndim
outindexer[axis] = mask
out[tuple(outindexer)] = fill_value
def _take_2d_multi_object(
arr: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value,
mask_info,
) -> None:
# this is not ideal, performance-wise, but it's better than raising
# an exception (best to optimize in Cython to avoid getting here)
row_idx, col_idx = indexer # both np.intp
if mask_info is not None:
(row_mask, col_mask), (row_needs, col_needs) = mask_info
else:
row_mask = row_idx == -1
col_mask = col_idx == -1
row_needs = row_mask.any()
col_needs = col_mask.any()
if fill_value is not None:
if row_needs:
out[row_mask, :] = fill_value
if col_needs:
out[:, col_mask] = fill_value
for i, u_ in enumerate(row_idx):
if u_ != -1:
for j, v in enumerate(col_idx):
if v != -1:
out[i, j] = arr[u_, v]
def _take_preprocess_indexer_and_fill_value(
arr: np.ndarray,
indexer: npt.NDArray[np.intp],
fill_value,
allow_fill: bool,
mask: npt.NDArray[np.bool_] | None = None,
):
mask_info: tuple[np.ndarray | None, bool] | None = None
if not allow_fill:
dtype, fill_value = arr.dtype, arr.dtype.type()
mask_info = None, False
else:
# check for promotion based on types only (do this first because
# it's faster than computing a mask)
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
if dtype != arr.dtype:
# check if promotion is actually required based on indexer
if mask is not None:
needs_masking = True
else:
mask = indexer == -1
needs_masking = bool(mask.any())
mask_info = mask, needs_masking
if not needs_masking:
# if not, then depromote, set fill_value to dummy
# (it won't be used but we don't want the cython code
# to crash when trying to cast it to dtype)
dtype, fill_value = arr.dtype, arr.dtype.type()
return dtype, fill_value, mask_info

View file

@ -0,0 +1,50 @@
"""
transforms.py is for shape-preserving functions.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
if TYPE_CHECKING:
from pandas._typing import (
AxisInt,
Scalar,
)
def shift(
values: np.ndarray, periods: int, axis: AxisInt, fill_value: Scalar
) -> np.ndarray:
new_values = values
if periods == 0 or values.size == 0:
return new_values.copy()
# make sure array sent to np.roll is c_contiguous
f_ordered = values.flags.f_contiguous
if f_ordered:
new_values = new_values.T
axis = new_values.ndim - axis - 1
if new_values.size:
new_values = np.roll(
new_values,
np.intp(periods),
axis=axis,
)
axis_indexer = [slice(None)] * values.ndim
if periods > 0:
axis_indexer[axis] = slice(None, periods)
else:
axis_indexer[axis] = slice(periods, None)
new_values[tuple(axis_indexer)] = fill_value
# restore original order
if f_ordered:
new_values = new_values.T
return new_values

View file

@ -0,0 +1,527 @@
"""
Methods that can be shared by many array-like classes or subclasses:
Series
Index
ExtensionArray
"""
from __future__ import annotations
import operator
from typing import Any
import numpy as np
from pandas._libs import lib
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
from pandas.core.dtypes.generic import ABCNDFrame
from pandas.core import roperator
from pandas.core.construction import extract_array
from pandas.core.ops.common import unpack_zerodim_and_defer
REDUCTION_ALIASES = {
"maximum": "max",
"minimum": "min",
"add": "sum",
"multiply": "prod",
}
class OpsMixin:
# -------------------------------------------------------------
# Comparisons
def _cmp_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__eq__")
def __eq__(self, other):
return self._cmp_method(other, operator.eq)
@unpack_zerodim_and_defer("__ne__")
def __ne__(self, other):
return self._cmp_method(other, operator.ne)
@unpack_zerodim_and_defer("__lt__")
def __lt__(self, other):
return self._cmp_method(other, operator.lt)
@unpack_zerodim_and_defer("__le__")
def __le__(self, other):
return self._cmp_method(other, operator.le)
@unpack_zerodim_and_defer("__gt__")
def __gt__(self, other):
return self._cmp_method(other, operator.gt)
@unpack_zerodim_and_defer("__ge__")
def __ge__(self, other):
return self._cmp_method(other, operator.ge)
# -------------------------------------------------------------
# Logical Methods
def _logical_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__and__")
def __and__(self, other):
return self._logical_method(other, operator.and_)
@unpack_zerodim_and_defer("__rand__")
def __rand__(self, other):
return self._logical_method(other, roperator.rand_)
@unpack_zerodim_and_defer("__or__")
def __or__(self, other):
return self._logical_method(other, operator.or_)
@unpack_zerodim_and_defer("__ror__")
def __ror__(self, other):
return self._logical_method(other, roperator.ror_)
@unpack_zerodim_and_defer("__xor__")
def __xor__(self, other):
return self._logical_method(other, operator.xor)
@unpack_zerodim_and_defer("__rxor__")
def __rxor__(self, other):
return self._logical_method(other, roperator.rxor)
# -------------------------------------------------------------
# Arithmetic Methods
def _arith_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__add__")
def __add__(self, other):
"""
Get Addition of DataFrame and other, column-wise.
Equivalent to ``DataFrame.add(other)``.
Parameters
----------
other : scalar, sequence, Series, dict or DataFrame
Object to be added to the DataFrame.
Returns
-------
DataFrame
The result of adding ``other`` to DataFrame.
See Also
--------
DataFrame.add : Add a DataFrame and another object, with option for index-
or column-oriented addition.
Examples
--------
>>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
... index=['elk', 'moose'])
>>> df
height weight
elk 1.5 500
moose 2.6 800
Adding a scalar affects all rows and columns.
>>> df[['height', 'weight']] + 1.5
height weight
elk 3.0 501.5
moose 4.1 801.5
Each element of a list is added to a column of the DataFrame, in order.
>>> df[['height', 'weight']] + [0.5, 1.5]
height weight
elk 2.0 501.5
moose 3.1 801.5
Keys of a dictionary are aligned to the DataFrame, based on column names;
each value in the dictionary is added to the corresponding column.
>>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
height weight
elk 2.0 501.5
moose 3.1 801.5
When `other` is a :class:`Series`, the index of `other` is aligned with the
columns of the DataFrame.
>>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
>>> df[['height', 'weight']] + s1
height weight
elk 3.0 500.5
moose 4.1 800.5
Even when the index of `other` is the same as the index of the DataFrame,
the :class:`Series` will not be reoriented. If index-wise alignment is desired,
:meth:`DataFrame.add` should be used with `axis='index'`.
>>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
>>> df[['height', 'weight']] + s2
elk height moose weight
elk NaN NaN NaN NaN
moose NaN NaN NaN NaN
>>> df[['height', 'weight']].add(s2, axis='index')
height weight
elk 2.0 500.5
moose 4.1 801.5
When `other` is a :class:`DataFrame`, both columns names and the
index are aligned.
>>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
... index=['elk', 'moose', 'deer'])
>>> df[['height', 'weight']] + other
height weight
deer NaN NaN
elk 1.7 NaN
moose 3.0 NaN
"""
return self._arith_method(other, operator.add)
@unpack_zerodim_and_defer("__radd__")
def __radd__(self, other):
return self._arith_method(other, roperator.radd)
@unpack_zerodim_and_defer("__sub__")
def __sub__(self, other):
return self._arith_method(other, operator.sub)
@unpack_zerodim_and_defer("__rsub__")
def __rsub__(self, other):
return self._arith_method(other, roperator.rsub)
@unpack_zerodim_and_defer("__mul__")
def __mul__(self, other):
return self._arith_method(other, operator.mul)
@unpack_zerodim_and_defer("__rmul__")
def __rmul__(self, other):
return self._arith_method(other, roperator.rmul)
@unpack_zerodim_and_defer("__truediv__")
def __truediv__(self, other):
return self._arith_method(other, operator.truediv)
@unpack_zerodim_and_defer("__rtruediv__")
def __rtruediv__(self, other):
return self._arith_method(other, roperator.rtruediv)
@unpack_zerodim_and_defer("__floordiv__")
def __floordiv__(self, other):
return self._arith_method(other, operator.floordiv)
@unpack_zerodim_and_defer("__rfloordiv")
def __rfloordiv__(self, other):
return self._arith_method(other, roperator.rfloordiv)
@unpack_zerodim_and_defer("__mod__")
def __mod__(self, other):
return self._arith_method(other, operator.mod)
@unpack_zerodim_and_defer("__rmod__")
def __rmod__(self, other):
return self._arith_method(other, roperator.rmod)
@unpack_zerodim_and_defer("__divmod__")
def __divmod__(self, other):
return self._arith_method(other, divmod)
@unpack_zerodim_and_defer("__rdivmod__")
def __rdivmod__(self, other):
return self._arith_method(other, roperator.rdivmod)
@unpack_zerodim_and_defer("__pow__")
def __pow__(self, other):
return self._arith_method(other, operator.pow)
@unpack_zerodim_and_defer("__rpow__")
def __rpow__(self, other):
return self._arith_method(other, roperator.rpow)
# -----------------------------------------------------------------------------
# Helpers to implement __array_ufunc__
def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
"""
Compatibility with numpy ufuncs.
See also
--------
numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
"""
from pandas.core.frame import (
DataFrame,
Series,
)
from pandas.core.generic import NDFrame
from pandas.core.internals import BlockManager
cls = type(self)
kwargs = _standardize_out_kwarg(**kwargs)
# for binary ops, use our custom dunder methods
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result
# Determine if we should defer.
no_defer = (
np.ndarray.__array_ufunc__,
cls.__array_ufunc__,
)
for item in inputs:
higher_priority = (
hasattr(item, "__array_priority__")
and item.__array_priority__ > self.__array_priority__
)
has_array_ufunc = (
hasattr(item, "__array_ufunc__")
and type(item).__array_ufunc__ not in no_defer
and not isinstance(item, self._HANDLED_TYPES)
)
if higher_priority or has_array_ufunc:
return NotImplemented
# align all the inputs.
types = tuple(type(x) for x in inputs)
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
if len(alignable) > 1:
# This triggers alignment.
# At the moment, there aren't any ufuncs with more than two inputs
# so this ends up just being x1.index | x2.index, but we write
# it to handle *args.
set_types = set(types)
if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):
# We currently don't handle ufunc(DataFrame, Series)
# well. Previously this raised an internal ValueError. We might
# support it someday, so raise a NotImplementedError.
raise NotImplementedError(
f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
)
axes = self.axes
for obj in alignable[1:]:
# this relies on the fact that we aren't handling mixed
# series / frame ufuncs.
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
axes[i] = ax1.union(ax2)
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
inputs = tuple(
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
for x, t in zip(inputs, types)
)
else:
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
if self.ndim == 1:
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
name = names[0] if len(set(names)) == 1 else None
reconstruct_kwargs = {"name": name}
else:
reconstruct_kwargs = {}
def reconstruct(result):
if ufunc.nout > 1:
# np.modf, np.frexp, np.divmod
return tuple(_reconstruct(x) for x in result)
return _reconstruct(result)
def _reconstruct(result):
if lib.is_scalar(result):
return result
if result.ndim != self.ndim:
if method == "outer":
raise NotImplementedError
return result
if isinstance(result, BlockManager):
# we went through BlockManager.apply e.g. np.sqrt
result = self._constructor_from_mgr(result, axes=result.axes)
else:
# we converted an array, lost our axes
result = self._constructor(
result, **reconstruct_axes, **reconstruct_kwargs, copy=False
)
# TODO: When we support multiple values in __finalize__, this
# should pass alignable to `__finalize__` instead of self.
# Then `np.add(a, b)` would consider attrs from both a and b
# when a and b are NDFrames.
if len(alignable) == 1:
result = result.__finalize__(self)
return result
if "out" in kwargs:
# e.g. test_multiindex_get_loc
result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
return reconstruct(result)
if method == "reduce":
# e.g. test.series.test_ufunc.test_reduce
result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result
# We still get here with kwargs `axis` for e.g. np.maximum.accumulate
# and `dtype` and `keepdims` for np.ptp
if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):
# Just give up on preserving types in the complex case.
# In theory we could preserve them for them.
# * nout>1 is doable if BlockManager.apply took nout and
# returned a Tuple[BlockManager].
# * len(inputs) > 1 is doable when we know that we have
# aligned blocks / dtypes.
# e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
inputs = tuple(np.asarray(x) for x in inputs)
# Note: we can't use default_array_ufunc here bc reindexing means
# that `self` may not be among `inputs`
result = getattr(ufunc, method)(*inputs, **kwargs)
elif self.ndim == 1:
# ufunc(series, ...)
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
result = getattr(ufunc, method)(*inputs, **kwargs)
else:
# ufunc(dataframe)
if method == "__call__" and not kwargs:
# for np.<ufunc>(..) calls
# kwargs cannot necessarily be handled block-by-block, so only
# take this path if there are no kwargs
mgr = inputs[0]._mgr
result = mgr.apply(getattr(ufunc, method))
else:
# otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
# Those can have an axis keyword and thus can't be called block-by-block
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
# e.g. np.negative (only one reached), with "where" and "out" in kwargs
result = reconstruct(result)
return result
def _standardize_out_kwarg(**kwargs) -> dict:
"""
If kwargs contain "out1" and "out2", replace that with a tuple "out"
np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or
`out1=out1, out2=out2)`
"""
if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:
out1 = kwargs.pop("out1")
out2 = kwargs.pop("out2")
out = (out1, out2)
kwargs["out"] = out
return kwargs
def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
If we have an `out` keyword, then call the ufunc without `out` and then
set the result into the given `out`.
"""
# Note: we assume _standardize_out_kwarg has already been called.
out = kwargs.pop("out")
where = kwargs.pop("where", None)
result = getattr(ufunc, method)(*inputs, **kwargs)
if result is NotImplemented:
return NotImplemented
if isinstance(result, tuple):
# i.e. np.divmod, np.modf, np.frexp
if not isinstance(out, tuple) or len(out) != len(result):
raise NotImplementedError
for arr, res in zip(out, result):
_assign_where(arr, res, where)
return out
if isinstance(out, tuple):
if len(out) == 1:
out = out[0]
else:
raise NotImplementedError
_assign_where(out, result, where)
return out
def _assign_where(out, result, where) -> None:
"""
Set a ufunc result into 'out', masking with a 'where' argument if necessary.
"""
if where is None:
# no 'where' arg passed to ufunc
out[:] = result
else:
np.putmask(out, where, result)
def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
Fallback to the behavior we would get if we did not define __array_ufunc__.
Notes
-----
We are assuming that `self` is among `inputs`.
"""
if not any(x is self for x in inputs):
raise NotImplementedError
new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
return getattr(ufunc, method)(*new_inputs, **kwargs)
def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
Dispatch ufunc reductions to self's reduction methods.
"""
assert method == "reduce"
if len(inputs) != 1 or inputs[0] is not self:
return NotImplemented
if ufunc.__name__ not in REDUCTION_ALIASES:
return NotImplemented
method_name = REDUCTION_ALIASES[ufunc.__name__]
# NB: we are assuming that min/max represent minimum/maximum methods,
# which would not be accurate for e.g. Timestamp.min
if not hasattr(self, method_name):
return NotImplemented
if self.ndim > 1:
if isinstance(self, ABCNDFrame):
# TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
kwargs["numeric_only"] = False
if "axis" not in kwargs:
# For DataFrame reductions we don't want the default axis=0
# Note: np.min is not a ufunc, but uses array_function_dispatch,
# so calls DataFrame.min (without ever getting here) with the np.min
# default of axis=None, which DataFrame.min catches and changes to axis=0.
# np.minimum.reduce(df) gets here bc axis is not in kwargs,
# so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
kwargs["axis"] = 0
# By default, numpy's reductions do not skip NaNs, so we have to
# pass skipna=False
return getattr(self, method_name)(skipna=False, **kwargs)

View file

@ -0,0 +1,43 @@
from pandas.core.arrays.arrow import ArrowExtensionArray
from pandas.core.arrays.base import (
ExtensionArray,
ExtensionOpsMixin,
ExtensionScalarOpsMixin,
)
from pandas.core.arrays.boolean import BooleanArray
from pandas.core.arrays.categorical import Categorical
from pandas.core.arrays.datetimes import DatetimeArray
from pandas.core.arrays.floating import FloatingArray
from pandas.core.arrays.integer import IntegerArray
from pandas.core.arrays.interval import IntervalArray
from pandas.core.arrays.masked import BaseMaskedArray
from pandas.core.arrays.numpy_ import NumpyExtensionArray
from pandas.core.arrays.period import (
PeriodArray,
period_array,
)
from pandas.core.arrays.sparse import SparseArray
from pandas.core.arrays.string_ import StringArray
from pandas.core.arrays.string_arrow import ArrowStringArray
from pandas.core.arrays.timedeltas import TimedeltaArray
__all__ = [
"ArrowExtensionArray",
"ExtensionArray",
"ExtensionOpsMixin",
"ExtensionScalarOpsMixin",
"ArrowStringArray",
"BaseMaskedArray",
"BooleanArray",
"Categorical",
"DatetimeArray",
"FloatingArray",
"IntegerArray",
"IntervalArray",
"NumpyExtensionArray",
"PeriodArray",
"period_array",
"SparseArray",
"StringArray",
"TimedeltaArray",
]

Some files were not shown because too many files have changed in this diff Show more