Best Python code snippet using pandera_python
common.py
Source:common.py  
...38    is_scalar,39    is_sequence,40)41_POSSIBLY_CAST_DTYPES = {42    np.dtype(t).name43    for t in [44        "O",45        "int8",46        "uint8",47        "int16",48        "uint16",49        "int32",50        "uint32",51        "int64",52        "uint64",53    ]54}55DT64NS_DTYPE = conversion.DT64NS_DTYPE56TD64NS_DTYPE = conversion.TD64NS_DTYPE57INT64_DTYPE = np.dtype(np.int64)58# oh the troubles to reduce import time59_is_scipy_sparse = None60ensure_float64 = algos.ensure_float6461ensure_float32 = algos.ensure_float3262def ensure_float(arr):63    """64    Ensure that an array object has a float dtype if possible.65    Parameters66    ----------67    arr : array-like68        The array whose data type we want to enforce as float.69    Returns70    -------71    float_arr : The original array cast to the float dtype if72                possible. Otherwise, the original array is returned.73    """74    if issubclass(arr.dtype.type, (np.integer, np.bool_)):75        arr = arr.astype(float)76    return arr77ensure_uint64 = algos.ensure_uint6478ensure_int64 = algos.ensure_int6479ensure_int32 = algos.ensure_int3280ensure_int16 = algos.ensure_int1681ensure_int8 = algos.ensure_int882ensure_platform_int = algos.ensure_platform_int83ensure_object = algos.ensure_object84def ensure_str(value: Union[bytes, Any]) -> str:85    """86    Ensure that bytes and non-strings get converted into ``str`` objects.87    """88    if isinstance(value, bytes):89        value = value.decode("utf-8")90    elif not isinstance(value, str):91        value = str(value)92    return value93def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.array:94    """95    Ensure that an dtype array of some integer dtype96    has an int64 dtype if possible.97    If it's not possible, potentially because of overflow,98    convert the array to float64 instead.99    Parameters100    ----------101    arr : array-like102          The array whose data type we want to enforce.103    copy: bool104          Whether to copy the original array or reuse105          it in place, if possible.106    Returns107    -------108    out_arr : The input array cast as int64 if109              possible without overflow.110              Otherwise the input array cast to float64.111    Notes112    -----113    If the array is explicitly of type uint64 the type114    will remain unchanged.115    """116    # TODO: GH27506 potential bug with ExtensionArrays117    try:118        return arr.astype("int64", copy=copy, casting="safe")  # type: ignore119    except TypeError:120        pass121    try:122        return arr.astype("uint64", copy=copy, casting="safe")  # type: ignore123    except TypeError:124        if is_extension_array_dtype(arr.dtype):125            return arr.to_numpy(dtype="float64", na_value=np.nan)126        return arr.astype("float64", copy=copy)127def ensure_python_int(value: Union[int, np.integer]) -> int:128    """129    Ensure that a value is a python int.130    Parameters131    ----------132    value: int or numpy.integer133    Returns134    -------135    int136    Raises137    ------138    TypeError: if the value isn't an int or can't be converted to one.139    """140    if not is_scalar(value):141        raise TypeError(142            f"Value needs to be a scalar value, was type {type(value).__name__}"143        )144    try:145        new_value = int(value)146        assert new_value == value147    except (TypeError, ValueError, AssertionError) as err:148        raise TypeError(f"Wrong type {type(value)} for value {value}") from err149    return new_value150def classes(*klasses) -> Callable:151    """ evaluate if the tipo is a subclass of the klasses """152    return lambda tipo: issubclass(tipo, klasses)153def classes_and_not_datetimelike(*klasses) -> Callable:154    """155    evaluate if the tipo is a subclass of the klasses156    and not a datetimelike157    """158    return lambda tipo: (159        issubclass(tipo, klasses)160        and not issubclass(tipo, (np.datetime64, np.timedelta64))161    )162def is_object_dtype(arr_or_dtype) -> bool:163    """164    Check whether an array-like or dtype is of the object dtype.165    Parameters166    ----------167    arr_or_dtype : array-like168        The array-like or dtype to check.169    Returns170    -------171    boolean172        Whether or not the array-like or dtype is of the object dtype.173    Examples174    --------175    >>> is_object_dtype(object)176    True177    >>> is_object_dtype(int)178    False179    >>> is_object_dtype(np.array([], dtype=object))180    True181    >>> is_object_dtype(np.array([], dtype=int))182    False183    >>> is_object_dtype([1, 2, 3])184    False185    """186    return _is_dtype_type(arr_or_dtype, classes(np.object_))187def is_sparse(arr) -> bool:188    """189    Check whether an array-like is a 1-D pandas sparse array.190    Check that the one-dimensional array-like is a pandas sparse array.191    Returns True if it is a pandas sparse array, not another type of192    sparse array.193    Parameters194    ----------195    arr : array-like196        Array-like to check.197    Returns198    -------199    bool200        Whether or not the array-like is a pandas sparse array.201    Examples202    --------203    Returns `True` if the parameter is a 1-D pandas sparse array.204    >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0]))205    True206    >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0])))207    True208    Returns `False` if the parameter is not sparse.209    >>> is_sparse(np.array([0, 0, 1, 0]))210    False211    >>> is_sparse(pd.Series([0, 1, 0, 0]))212    False213    Returns `False` if the parameter is not a pandas sparse array.214    >>> from scipy.sparse import bsr_matrix215    >>> is_sparse(bsr_matrix([0, 1, 0, 0]))216    False217    Returns `False` if the parameter has more than one dimension.218    """219    from pandas.core.arrays.sparse import SparseDtype220    dtype = getattr(arr, "dtype", arr)221    return isinstance(dtype, SparseDtype)222def is_scipy_sparse(arr) -> bool:223    """224    Check whether an array-like is a scipy.sparse.spmatrix instance.225    Parameters226    ----------227    arr : array-like228        The array-like to check.229    Returns230    -------231    boolean232        Whether or not the array-like is a scipy.sparse.spmatrix instance.233    Notes234    -----235    If scipy is not installed, this function will always return False.236    Examples237    --------238    >>> from scipy.sparse import bsr_matrix239    >>> is_scipy_sparse(bsr_matrix([1, 2, 3]))240    True241    >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3]))242    False243    """244    global _is_scipy_sparse245    if _is_scipy_sparse is None:246        try:247            from scipy.sparse import issparse as _is_scipy_sparse248        except ImportError:249            _is_scipy_sparse = lambda _: False250    assert _is_scipy_sparse is not None251    return _is_scipy_sparse(arr)252def is_categorical(arr) -> bool:253    """254    Check whether an array-like is a Categorical instance.255    Parameters256    ----------257    arr : array-like258        The array-like to check.259    Returns260    -------261    boolean262        Whether or not the array-like is of a Categorical instance.263    Examples264    --------265    >>> is_categorical([1, 2, 3])266    False267    Categoricals, Series Categoricals, and CategoricalIndex will return True.268    >>> cat = pd.Categorical([1, 2, 3])269    >>> is_categorical(cat)270    True271    >>> is_categorical(pd.Series(cat))272    True273    >>> is_categorical(pd.CategoricalIndex([1, 2, 3]))274    True275    """276    warnings.warn(277        "is_categorical is deprecated and will be removed in a future version.  "278        "Use is_categorical_dtype instead",279        FutureWarning,280        stacklevel=2,281    )282    return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr)283def is_datetime64_dtype(arr_or_dtype) -> bool:284    """285    Check whether an array-like or dtype is of the datetime64 dtype.286    Parameters287    ----------288    arr_or_dtype : array-like289        The array-like or dtype to check.290    Returns291    -------292    boolean293        Whether or not the array-like or dtype is of the datetime64 dtype.294    Examples295    --------296    >>> is_datetime64_dtype(object)297    False298    >>> is_datetime64_dtype(np.datetime64)299    True300    >>> is_datetime64_dtype(np.array([], dtype=int))301    False302    >>> is_datetime64_dtype(np.array([], dtype=np.datetime64))303    True304    >>> is_datetime64_dtype([1, 2, 3])305    False306    """307    if isinstance(arr_or_dtype, np.dtype):308        # GH#33400 fastpath for dtype object309        return arr_or_dtype.kind == "M"310    return _is_dtype_type(arr_or_dtype, classes(np.datetime64))311def is_datetime64tz_dtype(arr_or_dtype) -> bool:312    """313    Check whether an array-like or dtype is of a DatetimeTZDtype dtype.314    Parameters315    ----------316    arr_or_dtype : array-like317        The array-like or dtype to check.318    Returns319    -------320    boolean321        Whether or not the array-like or dtype is of a DatetimeTZDtype dtype.322    Examples323    --------324    >>> is_datetime64tz_dtype(object)325    False326    >>> is_datetime64tz_dtype([1, 2, 3])327    False328    >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3]))  # tz-naive329    False330    >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))331    True332    >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern")333    >>> s = pd.Series([], dtype=dtype)334    >>> is_datetime64tz_dtype(dtype)335    True336    >>> is_datetime64tz_dtype(s)337    True338    """339    if isinstance(arr_or_dtype, ExtensionDtype):340        # GH#33400 fastpath for dtype object341        return arr_or_dtype.kind == "M"342    if arr_or_dtype is None:343        return False344    return DatetimeTZDtype.is_dtype(arr_or_dtype)345def is_timedelta64_dtype(arr_or_dtype) -> bool:346    """347    Check whether an array-like or dtype is of the timedelta64 dtype.348    Parameters349    ----------350    arr_or_dtype : array-like351        The array-like or dtype to check.352    Returns353    -------354    boolean355        Whether or not the array-like or dtype is of the timedelta64 dtype.356    Examples357    --------358    >>> is_timedelta64_dtype(object)359    False360    >>> is_timedelta64_dtype(np.timedelta64)361    True362    >>> is_timedelta64_dtype([1, 2, 3])363    False364    >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))365    True366    >>> is_timedelta64_dtype('0 days')367    False368    """369    if isinstance(arr_or_dtype, np.dtype):370        # GH#33400 fastpath for dtype object371        return arr_or_dtype.kind == "m"372    return _is_dtype_type(arr_or_dtype, classes(np.timedelta64))373def is_period_dtype(arr_or_dtype) -> bool:374    """375    Check whether an array-like or dtype is of the Period dtype.376    Parameters377    ----------378    arr_or_dtype : array-like379        The array-like or dtype to check.380    Returns381    -------382    boolean383        Whether or not the array-like or dtype is of the Period dtype.384    Examples385    --------386    >>> is_period_dtype(object)387    False388    >>> is_period_dtype(PeriodDtype(freq="D"))389    True390    >>> is_period_dtype([1, 2, 3])391    False392    >>> is_period_dtype(pd.Period("2017-01-01"))393    False394    >>> is_period_dtype(pd.PeriodIndex([], freq="A"))395    True396    """397    if isinstance(arr_or_dtype, ExtensionDtype):398        # GH#33400 fastpath for dtype object399        return arr_or_dtype.type is Period400    if arr_or_dtype is None:401        return False402    return PeriodDtype.is_dtype(arr_or_dtype)403def is_interval_dtype(arr_or_dtype) -> bool:404    """405    Check whether an array-like or dtype is of the Interval dtype.406    Parameters407    ----------408    arr_or_dtype : array-like409        The array-like or dtype to check.410    Returns411    -------412    boolean413        Whether or not the array-like or dtype is of the Interval dtype.414    Examples415    --------416    >>> is_interval_dtype(object)417    False418    >>> is_interval_dtype(IntervalDtype())419    True420    >>> is_interval_dtype([1, 2, 3])421    False422    >>>423    >>> interval = pd.Interval(1, 2, closed="right")424    >>> is_interval_dtype(interval)425    False426    >>> is_interval_dtype(pd.IntervalIndex([interval]))427    True428    """429    if isinstance(arr_or_dtype, ExtensionDtype):430        # GH#33400 fastpath for dtype object431        return arr_or_dtype.type is Interval432    if arr_or_dtype is None:433        return False434    return IntervalDtype.is_dtype(arr_or_dtype)435def is_categorical_dtype(arr_or_dtype) -> bool:436    """437    Check whether an array-like or dtype is of the Categorical dtype.438    Parameters439    ----------440    arr_or_dtype : array-like441        The array-like or dtype to check.442    Returns443    -------444    boolean445        Whether or not the array-like or dtype is of the Categorical dtype.446    Examples447    --------448    >>> is_categorical_dtype(object)449    False450    >>> is_categorical_dtype(CategoricalDtype())451    True452    >>> is_categorical_dtype([1, 2, 3])453    False454    >>> is_categorical_dtype(pd.Categorical([1, 2, 3]))455    True456    >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))457    True458    """459    if isinstance(arr_or_dtype, ExtensionDtype):460        # GH#33400 fastpath for dtype object461        return arr_or_dtype.name == "category"462    if arr_or_dtype is None:463        return False464    return CategoricalDtype.is_dtype(arr_or_dtype)465def is_string_dtype(arr_or_dtype) -> bool:466    """467    Check whether the provided array or dtype is of the string dtype.468    Parameters469    ----------470    arr_or_dtype : array-like471        The array or dtype to check.472    Returns473    -------474    boolean475        Whether or not the array or dtype is of the string dtype.476    Examples477    --------478    >>> is_string_dtype(str)479    True480    >>> is_string_dtype(object)481    True482    >>> is_string_dtype(int)483    False484    >>>485    >>> is_string_dtype(np.array(['a', 'b']))486    True487    >>> is_string_dtype(pd.Series([1, 2]))488    False489    """490    # TODO: gh-15585: consider making the checks stricter.491    def condition(dtype) -> bool:492        return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype)493    def is_excluded_dtype(dtype) -> bool:494        """495        These have kind = "O" but aren't string dtypes so need to be explicitly excluded496        """497        is_excluded_checks = (is_period_dtype, is_interval_dtype, is_categorical_dtype)498        return any(is_excluded(dtype) for is_excluded in is_excluded_checks)499    return _is_dtype(arr_or_dtype, condition)500def is_dtype_equal(source, target) -> bool:501    """502    Check if two dtypes are equal.503    Parameters504    ----------505    source : The first dtype to compare506    target : The second dtype to compare507    Returns508    -------509    boolean510        Whether or not the two dtypes are equal.511    Examples512    --------513    >>> is_dtype_equal(int, float)514    False515    >>> is_dtype_equal("int", int)516    True517    >>> is_dtype_equal(object, "category")518    False519    >>> is_dtype_equal(CategoricalDtype(), "category")520    True521    >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64")522    False523    """524    try:525        source = _get_dtype(source)526        target = _get_dtype(target)527        return source == target528    except (TypeError, AttributeError):529        # invalid comparison530        # object == category will hit this531        return False532def is_any_int_dtype(arr_or_dtype) -> bool:533    """534    Check whether the provided array or dtype is of an integer dtype.535    In this function, timedelta64 instances are also considered "any-integer"536    type objects and will return True.537    This function is internal and should not be exposed in the public API.538    .. versionchanged:: 0.24.0539       The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered540       as integer by this function.541    Parameters542    ----------543    arr_or_dtype : array-like544        The array or dtype to check.545    Returns546    -------547    boolean548        Whether or not the array or dtype is of an integer dtype.549    Examples550    --------551    >>> is_any_int_dtype(str)552    False553    >>> is_any_int_dtype(int)554    True555    >>> is_any_int_dtype(float)556    False557    >>> is_any_int_dtype(np.uint64)558    True559    >>> is_any_int_dtype(np.datetime64)560    False561    >>> is_any_int_dtype(np.timedelta64)562    True563    >>> is_any_int_dtype(np.array(['a', 'b']))564    False565    >>> is_any_int_dtype(pd.Series([1, 2]))566    True567    >>> is_any_int_dtype(np.array([], dtype=np.timedelta64))568    True569    >>> is_any_int_dtype(pd.Index([1, 2.]))  # float570    False571    """572    return _is_dtype_type(arr_or_dtype, classes(np.integer, np.timedelta64))573def is_integer_dtype(arr_or_dtype) -> bool:574    """575    Check whether the provided array or dtype is of an integer dtype.576    Unlike in `in_any_int_dtype`, timedelta64 instances will return False.577    .. versionchanged:: 0.24.0578       The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered579       as integer by this function.580    Parameters581    ----------582    arr_or_dtype : array-like583        The array or dtype to check.584    Returns585    -------586    boolean587        Whether or not the array or dtype is of an integer dtype and588        not an instance of timedelta64.589    Examples590    --------591    >>> is_integer_dtype(str)592    False593    >>> is_integer_dtype(int)594    True595    >>> is_integer_dtype(float)596    False597    >>> is_integer_dtype(np.uint64)598    True599    >>> is_integer_dtype('int8')600    True601    >>> is_integer_dtype('Int8')602    True603    >>> is_integer_dtype(pd.Int8Dtype)604    True605    >>> is_integer_dtype(np.datetime64)606    False607    >>> is_integer_dtype(np.timedelta64)608    False609    >>> is_integer_dtype(np.array(['a', 'b']))610    False611    >>> is_integer_dtype(pd.Series([1, 2]))612    True613    >>> is_integer_dtype(np.array([], dtype=np.timedelta64))614    False615    >>> is_integer_dtype(pd.Index([1, 2.]))  # float616    False617    """618    return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.integer))619def is_signed_integer_dtype(arr_or_dtype) -> bool:620    """621    Check whether the provided array or dtype is of a signed integer dtype.622    Unlike in `in_any_int_dtype`, timedelta64 instances will return False.623    .. versionchanged:: 0.24.0624       The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered625       as integer by this function.626    Parameters627    ----------628    arr_or_dtype : array-like629        The array or dtype to check.630    Returns631    -------632    boolean633        Whether or not the array or dtype is of a signed integer dtype634        and not an instance of timedelta64.635    Examples636    --------637    >>> is_signed_integer_dtype(str)638    False639    >>> is_signed_integer_dtype(int)640    True641    >>> is_signed_integer_dtype(float)642    False643    >>> is_signed_integer_dtype(np.uint64)  # unsigned644    False645    >>> is_signed_integer_dtype('int8')646    True647    >>> is_signed_integer_dtype('Int8')648    True649    >>> is_signed_integer_dtype(pd.Int8Dtype)650    True651    >>> is_signed_integer_dtype(np.datetime64)652    False653    >>> is_signed_integer_dtype(np.timedelta64)654    False655    >>> is_signed_integer_dtype(np.array(['a', 'b']))656    False657    >>> is_signed_integer_dtype(pd.Series([1, 2]))658    True659    >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64))660    False661    >>> is_signed_integer_dtype(pd.Index([1, 2.]))  # float662    False663    >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32))  # unsigned664    False665    """666    return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.signedinteger))667def is_unsigned_integer_dtype(arr_or_dtype) -> bool:668    """669    Check whether the provided array or dtype is of an unsigned integer dtype.670    .. versionchanged:: 0.24.0671       The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also672       considered as integer by this function.673    Parameters674    ----------675    arr_or_dtype : array-like676        The array or dtype to check.677    Returns678    -------679    boolean680        Whether or not the array or dtype is of an unsigned integer dtype.681    Examples682    --------683    >>> is_unsigned_integer_dtype(str)684    False685    >>> is_unsigned_integer_dtype(int)  # signed686    False687    >>> is_unsigned_integer_dtype(float)688    False689    >>> is_unsigned_integer_dtype(np.uint64)690    True691    >>> is_unsigned_integer_dtype('uint8')692    True693    >>> is_unsigned_integer_dtype('UInt8')694    True695    >>> is_unsigned_integer_dtype(pd.UInt8Dtype)696    True697    >>> is_unsigned_integer_dtype(np.array(['a', 'b']))698    False699    >>> is_unsigned_integer_dtype(pd.Series([1, 2]))  # signed700    False701    >>> is_unsigned_integer_dtype(pd.Index([1, 2.]))  # float702    False703    >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32))704    True705    """706    return _is_dtype_type(707        arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger)708    )709def is_int64_dtype(arr_or_dtype) -> bool:710    """711    Check whether the provided array or dtype is of the int64 dtype.712    Parameters713    ----------714    arr_or_dtype : array-like715        The array or dtype to check.716    Returns717    -------718    boolean719        Whether or not the array or dtype is of the int64 dtype.720    Notes721    -----722    Depending on system architecture, the return value of `is_int64_dtype(723    int)` will be True if the OS uses 64-bit integers and False if the OS724    uses 32-bit integers.725    Examples726    --------727    >>> is_int64_dtype(str)728    False729    >>> is_int64_dtype(np.int32)730    False731    >>> is_int64_dtype(np.int64)732    True733    >>> is_int64_dtype('int8')734    False735    >>> is_int64_dtype('Int8')736    False737    >>> is_int64_dtype(pd.Int64Dtype)738    True739    >>> is_int64_dtype(float)740    False741    >>> is_int64_dtype(np.uint64)  # unsigned742    False743    >>> is_int64_dtype(np.array(['a', 'b']))744    False745    >>> is_int64_dtype(np.array([1, 2], dtype=np.int64))746    True747    >>> is_int64_dtype(pd.Index([1, 2.]))  # float748    False749    >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32))  # unsigned750    False751    """752    return _is_dtype_type(arr_or_dtype, classes(np.int64))753def is_datetime64_any_dtype(arr_or_dtype) -> bool:754    """755    Check whether the provided array or dtype is of the datetime64 dtype.756    Parameters757    ----------758    arr_or_dtype : array-like759        The array or dtype to check.760    Returns761    -------762    bool763        Whether or not the array or dtype is of the datetime64 dtype.764    Examples765    --------766    >>> is_datetime64_any_dtype(str)767    False768    >>> is_datetime64_any_dtype(int)769    False770    >>> is_datetime64_any_dtype(np.datetime64)  # can be tz-naive771    True772    >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))773    True774    >>> is_datetime64_any_dtype(np.array(['a', 'b']))775    False776    >>> is_datetime64_any_dtype(np.array([1, 2]))777    False778    >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]"))779    True780    >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))781    True782    """783    if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)):784        # GH#33400 fastpath for dtype object785        return arr_or_dtype.kind == "M"786    if arr_or_dtype is None:787        return False788    return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype)789def is_datetime64_ns_dtype(arr_or_dtype) -> bool:790    """791    Check whether the provided array or dtype is of the datetime64[ns] dtype.792    Parameters793    ----------794    arr_or_dtype : array-like795        The array or dtype to check.796    Returns797    -------798    bool799        Whether or not the array or dtype is of the datetime64[ns] dtype.800    Examples801    --------802    >>> is_datetime64_ns_dtype(str)803    False804    >>> is_datetime64_ns_dtype(int)805    False806    >>> is_datetime64_ns_dtype(np.datetime64)  # no unit807    False808    >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern"))809    True810    >>> is_datetime64_ns_dtype(np.array(['a', 'b']))811    False812    >>> is_datetime64_ns_dtype(np.array([1, 2]))813    False814    >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64"))  # no unit815    False816    >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]"))  # wrong unit817    False818    >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))819    True820    """821    if arr_or_dtype is None:822        return False823    try:824        tipo = _get_dtype(arr_or_dtype)825    except TypeError:826        if is_datetime64tz_dtype(arr_or_dtype):827            tipo = _get_dtype(arr_or_dtype.dtype)828        else:829            return False830    return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE831def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:832    """833    Check whether the provided array or dtype is of the timedelta64[ns] dtype.834    This is a very specific dtype, so generic ones like `np.timedelta64`835    will return False if passed into this function.836    Parameters837    ----------838    arr_or_dtype : array-like839        The array or dtype to check.840    Returns841    -------842    boolean843        Whether or not the array or dtype is of the timedelta64[ns] dtype.844    Examples845    --------846    >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]'))847    True848    >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]'))  # Wrong frequency849    False850    >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]'))851    True852    >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64))853    False854    """855    return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE)856def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool:857    """858    Check whether the provided array or dtype is of859    a timedelta64 or datetime64 dtype.860    Parameters861    ----------862    arr_or_dtype : array-like863        The array or dtype to check.864    Returns865    -------866    boolean867        Whether or not the array or dtype is of a timedelta64,868        or datetime64 dtype.869    Examples870    --------871    >>> is_datetime_or_timedelta_dtype(str)872    False873    >>> is_datetime_or_timedelta_dtype(int)874    False875    >>> is_datetime_or_timedelta_dtype(np.datetime64)876    True877    >>> is_datetime_or_timedelta_dtype(np.timedelta64)878    True879    >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b']))880    False881    >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2]))882    False883    >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64))884    True885    >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64))886    True887    """888    return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64))889# This exists to silence numpy deprecation warnings, see GH#29553890def is_numeric_v_string_like(a, b):891    """892    Check if we are comparing a string-like object to a numeric ndarray.893    NumPy doesn't like to compare such objects, especially numeric arrays894    and scalar string-likes.895    Parameters896    ----------897    a : array-like, scalar898        The first object to check.899    b : array-like, scalar900        The second object to check.901    Returns902    -------903    boolean904        Whether we return a comparing a string-like object to a numeric array.905    Examples906    --------907    >>> is_numeric_v_string_like(1, 1)908    False909    >>> is_numeric_v_string_like("foo", "foo")910    False911    >>> is_numeric_v_string_like(1, "foo")  # non-array numeric912    False913    >>> is_numeric_v_string_like(np.array([1]), "foo")914    True915    >>> is_numeric_v_string_like("foo", np.array([1]))  # symmetric check916    True917    >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))918    True919    >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))920    True921    >>> is_numeric_v_string_like(np.array([1]), np.array([2]))922    False923    >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))924    False925    """926    is_a_array = isinstance(a, np.ndarray)927    is_b_array = isinstance(b, np.ndarray)928    is_a_numeric_array = is_a_array and is_numeric_dtype(a)929    is_b_numeric_array = is_b_array and is_numeric_dtype(b)930    is_a_string_array = is_a_array and is_string_like_dtype(a)931    is_b_string_array = is_b_array and is_string_like_dtype(b)932    is_a_scalar_string_like = not is_a_array and isinstance(a, str)933    is_b_scalar_string_like = not is_b_array and isinstance(b, str)934    return (935        (is_a_numeric_array and is_b_scalar_string_like)936        or (is_b_numeric_array and is_a_scalar_string_like)937        or (is_a_numeric_array and is_b_string_array)938        or (is_b_numeric_array and is_a_string_array)939    )940# This exists to silence numpy deprecation warnings, see GH#29553941def is_datetimelike_v_numeric(a, b):942    """943    Check if we are comparing a datetime-like object to a numeric object.944    By "numeric," we mean an object that is either of an int or float dtype.945    Parameters946    ----------947    a : array-like, scalar948        The first object to check.949    b : array-like, scalar950        The second object to check.951    Returns952    -------953    boolean954        Whether we return a comparing a datetime-like to a numeric object.955    Examples956    --------957    >>> from datetime import datetime958    >>> dt = np.datetime64(datetime(2017, 1, 1))959    >>>960    >>> is_datetimelike_v_numeric(1, 1)961    False962    >>> is_datetimelike_v_numeric(dt, dt)963    False964    >>> is_datetimelike_v_numeric(1, dt)965    True966    >>> is_datetimelike_v_numeric(dt, 1)  # symmetric check967    True968    >>> is_datetimelike_v_numeric(np.array([dt]), 1)969    True970    >>> is_datetimelike_v_numeric(np.array([1]), dt)971    True972    >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1]))973    True974    >>> is_datetimelike_v_numeric(np.array([1]), np.array([2]))975    False976    >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt]))977    False978    """979    if not hasattr(a, "dtype"):980        a = np.asarray(a)981    if not hasattr(b, "dtype"):982        b = np.asarray(b)983    def is_numeric(x):984        """985        Check if an object has a numeric dtype (i.e. integer or float).986        """987        return is_integer_dtype(x) or is_float_dtype(x)988    return (needs_i8_conversion(a) and is_numeric(b)) or (989        needs_i8_conversion(b) and is_numeric(a)990    )991def needs_i8_conversion(arr_or_dtype) -> bool:992    """993    Check whether the array or dtype should be converted to int64.994    An array-like or dtype "needs" such a conversion if the array-like995    or dtype is of a datetime-like dtype996    Parameters997    ----------998    arr_or_dtype : array-like999        The array or dtype to check.1000    Returns1001    -------1002    boolean1003        Whether or not the array or dtype should be converted to int64.1004    Examples1005    --------1006    >>> needs_i8_conversion(str)1007    False1008    >>> needs_i8_conversion(np.int64)1009    False1010    >>> needs_i8_conversion(np.datetime64)1011    True1012    >>> needs_i8_conversion(np.array(['a', 'b']))1013    False1014    >>> needs_i8_conversion(pd.Series([1, 2]))1015    False1016    >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]"))1017    True1018    >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))1019    True1020    """1021    if arr_or_dtype is None:1022        return False1023    return (1024        is_datetime_or_timedelta_dtype(arr_or_dtype)1025        or is_datetime64tz_dtype(arr_or_dtype)1026        or is_period_dtype(arr_or_dtype)1027    )1028def is_numeric_dtype(arr_or_dtype) -> bool:1029    """1030    Check whether the provided array or dtype is of a numeric dtype.1031    Parameters1032    ----------1033    arr_or_dtype : array-like1034        The array or dtype to check.1035    Returns1036    -------1037    boolean1038        Whether or not the array or dtype is of a numeric dtype.1039    Examples1040    --------1041    >>> is_numeric_dtype(str)1042    False1043    >>> is_numeric_dtype(int)1044    True1045    >>> is_numeric_dtype(float)1046    True1047    >>> is_numeric_dtype(np.uint64)1048    True1049    >>> is_numeric_dtype(np.datetime64)1050    False1051    >>> is_numeric_dtype(np.timedelta64)1052    False1053    >>> is_numeric_dtype(np.array(['a', 'b']))1054    False1055    >>> is_numeric_dtype(pd.Series([1, 2]))1056    True1057    >>> is_numeric_dtype(pd.Index([1, 2.]))1058    True1059    >>> is_numeric_dtype(np.array([], dtype=np.timedelta64))1060    False1061    """1062    return _is_dtype_type(1063        arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)1064    )1065def is_string_like_dtype(arr_or_dtype) -> bool:1066    """1067    Check whether the provided array or dtype is of a string-like dtype.1068    Unlike `is_string_dtype`, the object dtype is excluded because it1069    is a mixed dtype.1070    Parameters1071    ----------1072    arr_or_dtype : array-like1073        The array or dtype to check.1074    Returns1075    -------1076    boolean1077        Whether or not the array or dtype is of the string dtype.1078    Examples1079    --------1080    >>> is_string_like_dtype(str)1081    True1082    >>> is_string_like_dtype(object)1083    False1084    >>> is_string_like_dtype(np.array(['a', 'b']))1085    True1086    >>> is_string_like_dtype(pd.Series([1, 2]))1087    False1088    """1089    return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U"))1090def is_float_dtype(arr_or_dtype) -> bool:1091    """1092    Check whether the provided array or dtype is of a float dtype.1093    This function is internal and should not be exposed in the public API.1094    Parameters1095    ----------1096    arr_or_dtype : array-like1097        The array or dtype to check.1098    Returns1099    -------1100    boolean1101        Whether or not the array or dtype is of a float dtype.1102    Examples1103    --------1104    >>> is_float_dtype(str)1105    False1106    >>> is_float_dtype(int)1107    False1108    >>> is_float_dtype(float)1109    True1110    >>> is_float_dtype(np.array(['a', 'b']))1111    False1112    >>> is_float_dtype(pd.Series([1, 2]))1113    False1114    >>> is_float_dtype(pd.Index([1, 2.]))1115    True1116    """1117    return _is_dtype_type(arr_or_dtype, classes(np.floating))1118def is_bool_dtype(arr_or_dtype) -> bool:1119    """1120    Check whether the provided array or dtype is of a boolean dtype.1121    Parameters1122    ----------1123    arr_or_dtype : array-like1124        The array or dtype to check.1125    Returns1126    -------1127    boolean1128        Whether or not the array or dtype is of a boolean dtype.1129    Notes1130    -----1131    An ExtensionArray is considered boolean when the ``_is_boolean``1132    attribute is set to True.1133    Examples1134    --------1135    >>> is_bool_dtype(str)1136    False1137    >>> is_bool_dtype(int)1138    False1139    >>> is_bool_dtype(bool)1140    True1141    >>> is_bool_dtype(np.bool_)1142    True1143    >>> is_bool_dtype(np.array(['a', 'b']))1144    False1145    >>> is_bool_dtype(pd.Series([1, 2]))1146    False1147    >>> is_bool_dtype(np.array([True, False]))1148    True1149    >>> is_bool_dtype(pd.Categorical([True, False]))1150    True1151    >>> is_bool_dtype(pd.arrays.SparseArray([True, False]))1152    True1153    """1154    if arr_or_dtype is None:1155        return False1156    try:1157        dtype = _get_dtype(arr_or_dtype)1158    except TypeError:1159        return False1160    if isinstance(arr_or_dtype, CategoricalDtype):1161        arr_or_dtype = arr_or_dtype.categories1162        # now we use the special definition for Index1163    if isinstance(arr_or_dtype, ABCIndexClass):1164        # TODO(jreback)1165        # we don't have a boolean Index class1166        # so its object, we need to infer to1167        # guess this1168        return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean"1169    elif is_extension_array_dtype(arr_or_dtype):1170        dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype)1171        return dtype._is_boolean1172    return issubclass(dtype.type, np.bool_)1173def is_extension_type(arr) -> bool:1174    """1175    Check whether an array-like is of a pandas extension class instance.1176    .. deprecated:: 1.0.01177        Use ``is_extension_array_dtype`` instead.1178    Extension classes include categoricals, pandas sparse objects (i.e.1179    classes represented within the pandas library and not ones external1180    to it like scipy sparse matrices), and datetime-like arrays.1181    Parameters1182    ----------1183    arr : array-like1184        The array-like to check.1185    Returns1186    -------1187    boolean1188        Whether or not the array-like is of a pandas extension class instance.1189    Examples1190    --------1191    >>> is_extension_type([1, 2, 3])1192    False1193    >>> is_extension_type(np.array([1, 2, 3]))1194    False1195    >>>1196    >>> cat = pd.Categorical([1, 2, 3])1197    >>>1198    >>> is_extension_type(cat)1199    True1200    >>> is_extension_type(pd.Series(cat))1201    True1202    >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3]))1203    True1204    >>> from scipy.sparse import bsr_matrix1205    >>> is_extension_type(bsr_matrix([1, 2, 3]))1206    False1207    >>> is_extension_type(pd.DatetimeIndex([1, 2, 3]))1208    False1209    >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))1210    True1211    >>>1212    >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern")1213    >>> s = pd.Series([], dtype=dtype)1214    >>> is_extension_type(s)1215    True1216    """1217    warnings.warn(1218        "'is_extension_type' is deprecated and will be removed in a future "1219        "version.  Use 'is_extension_array_dtype' instead.",1220        FutureWarning,1221        stacklevel=2,1222    )1223    if is_categorical_dtype(arr):1224        return True1225    elif is_sparse(arr):1226        return True1227    elif is_datetime64tz_dtype(arr):1228        return True1229    return False1230def is_extension_array_dtype(arr_or_dtype) -> bool:1231    """1232    Check if an object is a pandas extension array type.1233    See the :ref:`Use Guide <extending.extension-types>` for more.1234    Parameters1235    ----------1236    arr_or_dtype : object1237        For array-like input, the ``.dtype`` attribute will1238        be extracted.1239    Returns1240    -------1241    bool1242        Whether the `arr_or_dtype` is an extension array type.1243    Notes1244    -----1245    This checks whether an object implements the pandas extension1246    array interface. In pandas, this includes:1247    * Categorical1248    * Sparse1249    * Interval1250    * Period1251    * DatetimeArray1252    * TimedeltaArray1253    Third-party libraries may implement arrays or types satisfying1254    this interface as well.1255    Examples1256    --------1257    >>> from pandas.api.types import is_extension_array_dtype1258    >>> arr = pd.Categorical(['a', 'b'])1259    >>> is_extension_array_dtype(arr)1260    True1261    >>> is_extension_array_dtype(arr.dtype)1262    True1263    >>> arr = np.array(['a', 'b'])1264    >>> is_extension_array_dtype(arr.dtype)1265    False1266    """1267    dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype)1268    return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None1269def is_complex_dtype(arr_or_dtype) -> bool:1270    """1271    Check whether the provided array or dtype is of a complex dtype.1272    Parameters1273    ----------1274    arr_or_dtype : array-like1275        The array or dtype to check.1276    Returns1277    -------1278    boolean1279        Whether or not the array or dtype is of a complex dtype.1280    Examples1281    --------1282    >>> is_complex_dtype(str)1283    False1284    >>> is_complex_dtype(int)1285    False1286    >>> is_complex_dtype(np.complex_)1287    True1288    >>> is_complex_dtype(np.array(['a', 'b']))1289    False1290    >>> is_complex_dtype(pd.Series([1, 2]))1291    False1292    >>> is_complex_dtype(np.array([1 + 1j, 5]))1293    True1294    """1295    return _is_dtype_type(arr_or_dtype, classes(np.complexfloating))1296def _is_dtype(arr_or_dtype, condition) -> bool:1297    """1298    Return a boolean if the condition is satisfied for the arr_or_dtype.1299    Parameters1300    ----------1301    arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType1302        The array-like or dtype object whose dtype we want to extract.1303    condition : callable[Union[np.dtype, ExtensionDtype]]1304    Returns1305    -------1306    bool1307    """1308    if arr_or_dtype is None:1309        return False1310    try:1311        dtype = _get_dtype(arr_or_dtype)1312    except (TypeError, ValueError, UnicodeEncodeError):1313        return False1314    return condition(dtype)1315def _get_dtype(arr_or_dtype) -> DtypeObj:1316    """1317    Get the dtype instance associated with an array1318    or dtype object.1319    Parameters1320    ----------1321    arr_or_dtype : array-like1322        The array-like or dtype object whose dtype we want to extract.1323    Returns1324    -------1325    obj_dtype : The extract dtype instance from the1326                passed in array or dtype object.1327    Raises1328    ------1329    TypeError : The passed in object is None.1330    """1331    if arr_or_dtype is None:1332        raise TypeError("Cannot deduce dtype from null object")1333    # fastpath1334    elif isinstance(arr_or_dtype, np.dtype):1335        return arr_or_dtype1336    elif isinstance(arr_or_dtype, type):1337        return np.dtype(arr_or_dtype)1338    # if we have an array-like1339    elif hasattr(arr_or_dtype, "dtype"):1340        arr_or_dtype = arr_or_dtype.dtype1341    return pandas_dtype(arr_or_dtype)1342def _is_dtype_type(arr_or_dtype, condition) -> bool:1343    """1344    Return a boolean if the condition is satisfied for the arr_or_dtype.1345    Parameters1346    ----------1347    arr_or_dtype : array-like1348        The array-like or dtype object whose dtype we want to extract.1349    condition : callable[Union[np.dtype, ExtensionDtypeType]]1350    Returns1351    -------1352    bool : if the condition is satisfied for the arr_or_dtype1353    """1354    if arr_or_dtype is None:1355        return condition(type(None))1356    # fastpath1357    if isinstance(arr_or_dtype, np.dtype):1358        return condition(arr_or_dtype.type)1359    elif isinstance(arr_or_dtype, type):1360        if issubclass(arr_or_dtype, ExtensionDtype):1361            arr_or_dtype = arr_or_dtype.type1362        return condition(np.dtype(arr_or_dtype).type)1363    # if we have an array-like1364    if hasattr(arr_or_dtype, "dtype"):1365        arr_or_dtype = arr_or_dtype.dtype1366    # we are not possibly a dtype1367    elif is_list_like(arr_or_dtype):1368        return condition(type(None))1369    try:1370        tipo = pandas_dtype(arr_or_dtype).type1371    except (TypeError, ValueError, UnicodeEncodeError):1372        if is_scalar(arr_or_dtype):1373            return condition(type(None))1374        return False1375    return condition(tipo)1376def infer_dtype_from_object(dtype):1377    """1378    Get a numpy dtype.type-style object for a dtype object.1379    This methods also includes handling of the datetime64[ns] and1380    datetime64[ns, TZ] objects.1381    If no dtype can be found, we return ``object``.1382    Parameters1383    ----------1384    dtype : dtype, type1385        The dtype object whose numpy dtype.type-style1386        object we want to extract.1387    Returns1388    -------1389    dtype_object : The extracted numpy dtype.type-style object.1390    """1391    if isinstance(dtype, type) and issubclass(dtype, np.generic):1392        # Type object from a dtype1393        return dtype1394    elif isinstance(dtype, (np.dtype, ExtensionDtype)):1395        # dtype object1396        try:1397            _validate_date_like_dtype(dtype)1398        except TypeError:1399            # Should still pass if we don't have a date-like1400            pass1401        return dtype.type1402    try:1403        dtype = pandas_dtype(dtype)1404    except TypeError:1405        pass1406    if is_extension_array_dtype(dtype):1407        return dtype.type1408    elif isinstance(dtype, str):1409        # TODO(jreback)1410        # should deprecate these1411        if dtype in ["datetimetz", "datetime64tz"]:1412            return DatetimeTZDtype.type1413        elif dtype in ["period"]:1414            raise NotImplementedError1415        if dtype == "datetime" or dtype == "timedelta":1416            dtype += "64"1417        try:1418            return infer_dtype_from_object(getattr(np, dtype))1419        except (AttributeError, TypeError):1420            # Handles cases like _get_dtype(int) i.e.,1421            # Python objects that are valid dtypes1422            # (unlike user-defined types, in general)1423            #1424            # TypeError handles the float16 type code of 'e'1425            # further handle internal types1426            pass1427    return infer_dtype_from_object(np.dtype(dtype))1428def _validate_date_like_dtype(dtype) -> None:1429    """1430    Check whether the dtype is a date-like dtype. Raises an error if invalid.1431    Parameters1432    ----------1433    dtype : dtype, type1434        The dtype to check.1435    Raises1436    ------1437    TypeError : The dtype could not be casted to a date-like dtype.1438    ValueError : The dtype is an illegal date-like dtype (e.g. the1439                 the frequency provided is too specific)1440    """1441    try:1442        typ = np.datetime_data(dtype)[0]1443    except ValueError as e:1444        raise TypeError(e) from e1445    if typ != "generic" and typ != "ns":1446        raise ValueError(1447            f"{repr(dtype.name)} is too specific of a frequency, "1448            f"try passing {repr(dtype.type.__name__)}"1449        )1450def pandas_dtype(dtype) -> DtypeObj:1451    """1452    Convert input into a pandas only dtype object or a numpy dtype object.1453    Parameters1454    ----------1455    dtype : object to be converted1456    Returns1457    -------1458    np.dtype or a pandas dtype1459    Raises1460    ------1461    TypeError if not a dtype1462    """1463    # short-circuit1464    if isinstance(dtype, np.ndarray):1465        return dtype.dtype1466    elif isinstance(dtype, (np.dtype, ExtensionDtype)):1467        return dtype1468    # registered extension types1469    result = registry.find(dtype)1470    if result is not None:1471        return result1472    # try a numpy dtype1473    # raise a consistent TypeError if failed1474    try:1475        npdtype = np.dtype(dtype)1476    except SyntaxError as err:1477        # np.dtype uses `eval` which can raise SyntaxError1478        raise TypeError(f"data type '{dtype}' not understood") from err1479    # Any invalid dtype (such as pd.Timestamp) should raise an error.1480    # np.dtype(invalid_type).kind = 0 for such objects. However, this will1481    # also catch some valid dtypes such as object, np.object_ and 'object'1482    # which we safeguard against by catching them earlier and returning1483    # np.dtype(valid_dtype) before this condition is evaluated.1484    if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]:1485        # check hashability to avoid errors/DeprecationWarning when we get1486        # here and `dtype` is an array1487        return npdtype1488    elif npdtype.kind == "O":1489        raise TypeError(f"dtype '{dtype}' not understood")...test_dtypes.py
Source:test_dtypes.py  
...45        msg = "|".join(46            ["data type not understood", "Cannot interpret '.*' as a data type"]47        )48        with pytest.raises(TypeError, match=msg):49            np.dtype(dtype)50        assert not dtype == np.str_51        assert not np.str_ == dtype52    def test_pickle(self, dtype):53        # make sure our cache is NOT pickled54        # clear the cache55        type(dtype).reset_cache()56        assert not len(dtype._cache)57        # force back to the cache58        result = tm.round_trip_pickle(dtype)59        if not isinstance(dtype, PeriodDtype):60            # Because PeriodDtype has a cython class as a base class,61            #  it has different pickle semantics, and its cache is re-populated62            #  on un-pickling.63            assert not len(dtype._cache)64        assert result == dtype65class TestCategoricalDtype(Base):66    @pytest.fixture67    def dtype(self):68        """69        Class level fixture of dtype for TestCategoricalDtype70        """71        return CategoricalDtype()72    def test_hash_vs_equality(self, dtype):73        dtype2 = CategoricalDtype()74        assert dtype == dtype275        assert dtype2 == dtype76        assert hash(dtype) == hash(dtype2)77    def test_equality(self, dtype):78        assert is_dtype_equal(dtype, "category")79        assert is_dtype_equal(dtype, CategoricalDtype())80        assert not is_dtype_equal(dtype, "foo")81    def test_construction_from_string(self, dtype):82        result = CategoricalDtype.construct_from_string("category")83        assert is_dtype_equal(dtype, result)84        msg = "Cannot construct a 'CategoricalDtype' from 'foo'"85        with pytest.raises(TypeError, match=msg):86            CategoricalDtype.construct_from_string("foo")87    def test_constructor_invalid(self):88        msg = "Parameter 'categories' must be list-like"89        with pytest.raises(TypeError, match=msg):90            CategoricalDtype("category")91    dtype1 = CategoricalDtype(["a", "b"], ordered=True)92    dtype2 = CategoricalDtype(["x", "y"], ordered=False)93    c = Categorical([0, 1], dtype=dtype1, fastpath=True)94    @pytest.mark.parametrize(95        "values, categories, ordered, dtype, expected",96        [97            [None, None, None, None, CategoricalDtype()],98            [None, ["a", "b"], True, None, dtype1],99            [c, None, None, dtype2, dtype2],100            [c, ["x", "y"], False, None, dtype2],101        ],102    )103    def test_from_values_or_dtype(self, values, categories, ordered, dtype, expected):104        result = CategoricalDtype._from_values_or_dtype(105            values, categories, ordered, dtype106        )107        assert result == expected108    @pytest.mark.parametrize(109        "values, categories, ordered, dtype",110        [111            [None, ["a", "b"], True, dtype2],112            [None, ["a", "b"], None, dtype2],113            [None, None, True, dtype2],114        ],115    )116    def test_from_values_or_dtype_raises(self, values, categories, ordered, dtype):117        msg = "Cannot specify `categories` or `ordered` together with `dtype`."118        with pytest.raises(ValueError, match=msg):119            CategoricalDtype._from_values_or_dtype(values, categories, ordered, dtype)120    def test_from_values_or_dtype_invalid_dtype(self):121        msg = "Cannot not construct CategoricalDtype from <class 'object'>"122        with pytest.raises(ValueError, match=msg):123            CategoricalDtype._from_values_or_dtype(None, None, None, object)124    def test_is_dtype(self, dtype):125        assert CategoricalDtype.is_dtype(dtype)126        assert CategoricalDtype.is_dtype("category")127        assert CategoricalDtype.is_dtype(CategoricalDtype())128        assert not CategoricalDtype.is_dtype("foo")129        assert not CategoricalDtype.is_dtype(np.float64)130    def test_basic(self, dtype):131        assert is_categorical_dtype(dtype)132        factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])133        s = Series(factor, name="A")134        # dtypes135        assert is_categorical_dtype(s.dtype)136        assert is_categorical_dtype(s)137        assert not is_categorical_dtype(np.dtype("float64"))138        with tm.assert_produces_warning(FutureWarning):139            # GH#33385 deprecated140            assert is_categorical(s.dtype)141            assert is_categorical(s)142            assert not is_categorical(np.dtype("float64"))143            assert not is_categorical(1.0)144    def test_tuple_categories(self):145        categories = [(1, "a"), (2, "b"), (3, "c")]146        result = CategoricalDtype(categories)147        assert all(result.categories == categories)148    @pytest.mark.parametrize(149        "categories, expected",150        [151            ([True, False], True),152            ([True, False, None], True),153            ([True, False, "a", "b'"], False),154            ([0, 1], False),155        ],156    )157    def test_is_boolean(self, categories, expected):158        cat = Categorical(categories)159        assert cat.dtype._is_boolean is expected160        assert is_bool_dtype(cat) is expected161        assert is_bool_dtype(cat.dtype) is expected162    def test_dtype_specific_categorical_dtype(self):163        expected = "datetime64[ns]"164        result = str(Categorical(DatetimeIndex([])).categories.dtype)165        assert result == expected166    def test_not_string(self):167        # though CategoricalDtype has object kind, it cannot be string168        assert not is_string_dtype(CategoricalDtype())169class TestDatetimeTZDtype(Base):170    @pytest.fixture171    def dtype(self):172        """173        Class level fixture of dtype for TestDatetimeTZDtype174        """175        return DatetimeTZDtype("ns", "US/Eastern")176    def test_alias_to_unit_raises(self):177        # 23990178        with pytest.raises(ValueError, match="Passing a dtype alias"):179            DatetimeTZDtype("datetime64[ns, US/Central]")180    def test_alias_to_unit_bad_alias_raises(self):181        # 23990182        with pytest.raises(TypeError, match=""):183            DatetimeTZDtype("this is a bad string")184        with pytest.raises(TypeError, match=""):185            DatetimeTZDtype("datetime64[ns, US/NotATZ]")186    def test_hash_vs_equality(self, dtype):187        # make sure that we satisfy is semantics188        dtype2 = DatetimeTZDtype("ns", "US/Eastern")189        dtype3 = DatetimeTZDtype(dtype2)190        assert dtype == dtype2191        assert dtype2 == dtype192        assert dtype3 == dtype193        assert hash(dtype) == hash(dtype2)194        assert hash(dtype) == hash(dtype3)195        dtype4 = DatetimeTZDtype("ns", "US/Central")196        assert dtype2 != dtype4197        assert hash(dtype2) != hash(dtype4)198    def test_construction(self):199        msg = "DatetimeTZDtype only supports ns units"200        with pytest.raises(ValueError, match=msg):201            DatetimeTZDtype("ms", "US/Eastern")202    def test_subclass(self):203        a = DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]")204        b = DatetimeTZDtype.construct_from_string("datetime64[ns, CET]")205        assert issubclass(type(a), type(a))206        assert issubclass(type(a), type(b))207    def test_compat(self, dtype):208        assert is_datetime64tz_dtype(dtype)209        assert is_datetime64tz_dtype("datetime64[ns, US/Eastern]")210        assert is_datetime64_any_dtype(dtype)211        assert is_datetime64_any_dtype("datetime64[ns, US/Eastern]")212        assert is_datetime64_ns_dtype(dtype)213        assert is_datetime64_ns_dtype("datetime64[ns, US/Eastern]")214        assert not is_datetime64_dtype(dtype)215        assert not is_datetime64_dtype("datetime64[ns, US/Eastern]")216    def test_construction_from_string(self, dtype):217        result = DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]")218        assert is_dtype_equal(dtype, result)219    @pytest.mark.parametrize(220        "string",221        [222            "foo",223            "datetime64[ns, notatz]",224            # non-nano unit225            "datetime64[ps, UTC]",226            # dateutil str that returns None from gettz227            "datetime64[ns, dateutil/invalid]",228        ],229    )230    def test_construct_from_string_invalid_raises(self, string):231        msg = f"Cannot construct a 'DatetimeTZDtype' from '{string}'"232        with pytest.raises(TypeError, match=re.escape(msg)):233            DatetimeTZDtype.construct_from_string(string)234    def test_construct_from_string_wrong_type_raises(self):235        msg = "'construct_from_string' expects a string, got <class 'list'>"236        with pytest.raises(TypeError, match=msg):237            DatetimeTZDtype.construct_from_string(["datetime64[ns, notatz]"])238    def test_is_dtype(self, dtype):239        assert not DatetimeTZDtype.is_dtype(None)240        assert DatetimeTZDtype.is_dtype(dtype)241        assert DatetimeTZDtype.is_dtype("datetime64[ns, US/Eastern]")242        assert DatetimeTZDtype.is_dtype("M8[ns, US/Eastern]")243        assert not DatetimeTZDtype.is_dtype("foo")244        assert DatetimeTZDtype.is_dtype(DatetimeTZDtype("ns", "US/Pacific"))245        assert not DatetimeTZDtype.is_dtype(np.float64)246    def test_equality(self, dtype):247        assert is_dtype_equal(dtype, "datetime64[ns, US/Eastern]")248        assert is_dtype_equal(dtype, "M8[ns, US/Eastern]")249        assert is_dtype_equal(dtype, DatetimeTZDtype("ns", "US/Eastern"))250        assert not is_dtype_equal(dtype, "foo")251        assert not is_dtype_equal(dtype, DatetimeTZDtype("ns", "CET"))252        assert not is_dtype_equal(253            DatetimeTZDtype("ns", "US/Eastern"), DatetimeTZDtype("ns", "US/Pacific")254        )255        # numpy compat256        assert is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")257        assert dtype == "M8[ns, US/Eastern]"258    def test_basic(self, dtype):259        assert is_datetime64tz_dtype(dtype)260        dr = date_range("20130101", periods=3, tz="US/Eastern")261        s = Series(dr, name="A")262        # dtypes263        assert is_datetime64tz_dtype(s.dtype)264        assert is_datetime64tz_dtype(s)265        assert not is_datetime64tz_dtype(np.dtype("float64"))266        assert not is_datetime64tz_dtype(1.0)267    def test_dst(self):268        dr1 = date_range("2013-01-01", periods=3, tz="US/Eastern")269        s1 = Series(dr1, name="A")270        assert is_datetime64tz_dtype(s1)271        dr2 = date_range("2013-08-01", periods=3, tz="US/Eastern")272        s2 = Series(dr2, name="A")273        assert is_datetime64tz_dtype(s2)274        assert s1.dtype == s2.dtype275    @pytest.mark.parametrize("tz", ["UTC", "US/Eastern"])276    @pytest.mark.parametrize("constructor", ["M8", "datetime64"])277    def test_parser(self, tz, constructor):278        # pr #11245279        dtz_str = f"{constructor}[ns, {tz}]"280        result = DatetimeTZDtype.construct_from_string(dtz_str)281        expected = DatetimeTZDtype("ns", tz)282        assert result == expected283    def test_empty(self):284        with pytest.raises(TypeError, match="A 'tz' is required."):285            DatetimeTZDtype()286    def test_tz_standardize(self):287        # GH 24713288        tz = pytz.timezone("US/Eastern")289        dr = date_range("2013-01-01", periods=3, tz="US/Eastern")290        dtype = DatetimeTZDtype("ns", dr.tz)291        assert dtype.tz == tz292        dtype = DatetimeTZDtype("ns", dr[0].tz)293        assert dtype.tz == tz294class TestPeriodDtype(Base):295    @pytest.fixture296    def dtype(self):297        """298        Class level fixture of dtype for TestPeriodDtype299        """300        return PeriodDtype("D")301    def test_hash_vs_equality(self, dtype):302        # make sure that we satisfy is semantics303        dtype2 = PeriodDtype("D")304        dtype3 = PeriodDtype(dtype2)305        assert dtype == dtype2306        assert dtype2 == dtype307        assert dtype3 == dtype308        assert dtype is dtype2309        assert dtype2 is dtype310        assert dtype3 is dtype311        assert hash(dtype) == hash(dtype2)312        assert hash(dtype) == hash(dtype3)313    def test_construction(self):314        with pytest.raises(ValueError, match="Invalid frequency: xx"):315            PeriodDtype("xx")316        for s in ["period[D]", "Period[D]", "D"]:317            dt = PeriodDtype(s)318            assert dt.freq == pd.tseries.offsets.Day()319            assert is_period_dtype(dt)320        for s in ["period[3D]", "Period[3D]", "3D"]:321            dt = PeriodDtype(s)322            assert dt.freq == pd.tseries.offsets.Day(3)323            assert is_period_dtype(dt)324        for s in [325            "period[26H]",326            "Period[26H]",327            "26H",328            "period[1D2H]",329            "Period[1D2H]",330            "1D2H",331        ]:332            dt = PeriodDtype(s)333            assert dt.freq == pd.tseries.offsets.Hour(26)334            assert is_period_dtype(dt)335    def test_subclass(self):336        a = PeriodDtype("period[D]")337        b = PeriodDtype("period[3D]")338        assert issubclass(type(a), type(a))339        assert issubclass(type(a), type(b))340    def test_identity(self):341        assert PeriodDtype("period[D]") == PeriodDtype("period[D]")342        assert PeriodDtype("period[D]") is PeriodDtype("period[D]")343        assert PeriodDtype("period[3D]") == PeriodDtype("period[3D]")344        assert PeriodDtype("period[3D]") is PeriodDtype("period[3D]")345        assert PeriodDtype("period[1S1U]") == PeriodDtype("period[1000001U]")346        assert PeriodDtype("period[1S1U]") is PeriodDtype("period[1000001U]")347    def test_compat(self, dtype):348        assert not is_datetime64_ns_dtype(dtype)349        assert not is_datetime64_ns_dtype("period[D]")350        assert not is_datetime64_dtype(dtype)351        assert not is_datetime64_dtype("period[D]")352    def test_construction_from_string(self, dtype):353        result = PeriodDtype("period[D]")354        assert is_dtype_equal(dtype, result)355        result = PeriodDtype.construct_from_string("period[D]")356        assert is_dtype_equal(dtype, result)357        with pytest.raises(TypeError, match="list"):358            PeriodDtype.construct_from_string([1, 2, 3])359    @pytest.mark.parametrize(360        "string",361        [362            "foo",363            "period[foo]",364            "foo[D]",365            "datetime64[ns]",366            "datetime64[ns, US/Eastern]",367        ],368    )369    def test_construct_dtype_from_string_invalid_raises(self, string):370        msg = f"Cannot construct a 'PeriodDtype' from '{string}'"371        with pytest.raises(TypeError, match=re.escape(msg)):372            PeriodDtype.construct_from_string(string)373    def test_is_dtype(self, dtype):374        assert PeriodDtype.is_dtype(dtype)375        assert PeriodDtype.is_dtype("period[D]")376        assert PeriodDtype.is_dtype("period[3D]")377        assert PeriodDtype.is_dtype(PeriodDtype("3D"))378        assert PeriodDtype.is_dtype("period[U]")379        assert PeriodDtype.is_dtype("period[S]")380        assert PeriodDtype.is_dtype(PeriodDtype("U"))381        assert PeriodDtype.is_dtype(PeriodDtype("S"))382        assert not PeriodDtype.is_dtype("D")383        assert not PeriodDtype.is_dtype("3D")384        assert not PeriodDtype.is_dtype("U")385        assert not PeriodDtype.is_dtype("S")386        assert not PeriodDtype.is_dtype("foo")387        assert not PeriodDtype.is_dtype(np.object_)388        assert not PeriodDtype.is_dtype(np.int64)389        assert not PeriodDtype.is_dtype(np.float64)390    def test_equality(self, dtype):391        assert is_dtype_equal(dtype, "period[D]")392        assert is_dtype_equal(dtype, PeriodDtype("D"))393        assert is_dtype_equal(dtype, PeriodDtype("D"))394        assert is_dtype_equal(PeriodDtype("D"), PeriodDtype("D"))395        assert not is_dtype_equal(dtype, "D")396        assert not is_dtype_equal(PeriodDtype("D"), PeriodDtype("2D"))397    def test_basic(self, dtype):398        assert is_period_dtype(dtype)399        pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")400        assert is_period_dtype(pidx.dtype)401        assert is_period_dtype(pidx)402        s = Series(pidx, name="A")403        assert is_period_dtype(s.dtype)404        assert is_period_dtype(s)405        assert not is_period_dtype(np.dtype("float64"))406        assert not is_period_dtype(1.0)407    def test_empty(self):408        dt = PeriodDtype()409        msg = "object has no attribute 'freqstr'"410        with pytest.raises(AttributeError, match=msg):411            str(dt)412    def test_not_string(self):413        # though PeriodDtype has object kind, it cannot be string414        assert not is_string_dtype(PeriodDtype("D"))415class TestIntervalDtype(Base):416    @pytest.fixture417    def dtype(self):418        """419        Class level fixture of dtype for TestIntervalDtype420        """421        return IntervalDtype("int64")422    def test_hash_vs_equality(self, dtype):423        # make sure that we satisfy is semantics424        dtype2 = IntervalDtype("int64")425        dtype3 = IntervalDtype(dtype2)426        assert dtype == dtype2427        assert dtype2 == dtype428        assert dtype3 == dtype429        assert dtype is dtype2430        assert dtype2 is dtype3431        assert dtype3 is dtype432        assert hash(dtype) == hash(dtype2)433        assert hash(dtype) == hash(dtype3)434        dtype1 = IntervalDtype("interval")435        dtype2 = IntervalDtype(dtype1)436        dtype3 = IntervalDtype("interval")437        assert dtype2 == dtype1438        assert dtype2 == dtype2439        assert dtype2 == dtype3440        assert dtype2 is dtype1441        assert dtype2 is dtype2442        assert dtype2 is dtype3443        assert hash(dtype2) == hash(dtype1)444        assert hash(dtype2) == hash(dtype2)445        assert hash(dtype2) == hash(dtype3)446    @pytest.mark.parametrize(447        "subtype", ["interval[int64]", "Interval[int64]", "int64", np.dtype("int64")]448    )449    def test_construction(self, subtype):450        i = IntervalDtype(subtype)451        assert i.subtype == np.dtype("int64")452        assert is_interval_dtype(i)453    @pytest.mark.parametrize("subtype", [None, "interval", "Interval"])454    def test_construction_generic(self, subtype):455        # generic456        i = IntervalDtype(subtype)457        assert i.subtype is None458        assert is_interval_dtype(i)459    @pytest.mark.parametrize(460        "subtype",461        [462            CategoricalDtype(list("abc"), False),463            CategoricalDtype(list("wxyz"), True),464            object,465            str,466            "<U10",467            "interval[category]",468            "interval[object]",469        ],470    )471    def test_construction_not_supported(self, subtype):472        # GH 19016473        msg = (474            "category, object, and string subtypes are not supported "475            "for IntervalDtype"476        )477        with pytest.raises(TypeError, match=msg):478            IntervalDtype(subtype)479    @pytest.mark.parametrize("subtype", ["xx", "IntervalA", "Interval[foo]"])480    def test_construction_errors(self, subtype):481        msg = "could not construct IntervalDtype"482        with pytest.raises(TypeError, match=msg):483            IntervalDtype(subtype)484    def test_construction_from_string(self, dtype):485        result = IntervalDtype("interval[int64]")486        assert is_dtype_equal(dtype, result)487        result = IntervalDtype.construct_from_string("interval[int64]")488        assert is_dtype_equal(dtype, result)489    @pytest.mark.parametrize("string", [0, 3.14, ("a", "b"), None])490    def test_construction_from_string_errors(self, string):491        # these are invalid entirely492        msg = f"'construct_from_string' expects a string, got {type(string)}"493        with pytest.raises(TypeError, match=re.escape(msg)):494            IntervalDtype.construct_from_string(string)495    @pytest.mark.parametrize("string", ["foo", "foo[int64]", "IntervalA"])496    def test_construction_from_string_error_subtype(self, string):497        # this is an invalid subtype498        msg = (499            "Incorrectly formatted string passed to constructor. "500            r"Valid formats include Interval or Interval\[dtype\] "501            "where dtype is numeric, datetime, or timedelta"502        )503        with pytest.raises(TypeError, match=msg):504            IntervalDtype.construct_from_string(string)505    def test_subclass(self):506        a = IntervalDtype("interval[int64]")507        b = IntervalDtype("interval[int64]")508        assert issubclass(type(a), type(a))509        assert issubclass(type(a), type(b))510    def test_is_dtype(self, dtype):511        assert IntervalDtype.is_dtype(dtype)512        assert IntervalDtype.is_dtype("interval")513        assert IntervalDtype.is_dtype(IntervalDtype("float64"))514        assert IntervalDtype.is_dtype(IntervalDtype("int64"))515        assert IntervalDtype.is_dtype(IntervalDtype(np.int64))516        assert not IntervalDtype.is_dtype("D")517        assert not IntervalDtype.is_dtype("3D")518        assert not IntervalDtype.is_dtype("U")519        assert not IntervalDtype.is_dtype("S")520        assert not IntervalDtype.is_dtype("foo")521        assert not IntervalDtype.is_dtype("IntervalA")522        assert not IntervalDtype.is_dtype(np.object_)523        assert not IntervalDtype.is_dtype(np.int64)524        assert not IntervalDtype.is_dtype(np.float64)525    def test_equality(self, dtype):526        assert is_dtype_equal(dtype, "interval[int64]")527        assert is_dtype_equal(dtype, IntervalDtype("int64"))528        assert is_dtype_equal(IntervalDtype("int64"), IntervalDtype("int64"))529        assert not is_dtype_equal(dtype, "int64")530        assert not is_dtype_equal(IntervalDtype("int64"), IntervalDtype("float64"))531        # invalid subtype comparisons do not raise when directly compared532        dtype1 = IntervalDtype("float64")533        dtype2 = IntervalDtype("datetime64[ns, US/Eastern]")534        assert dtype1 != dtype2535        assert dtype2 != dtype1536    @pytest.mark.parametrize(537        "subtype",538        [539            None,540            "interval",541            "Interval",542            "int64",543            "uint64",544            "float64",545            "complex128",546            "datetime64",547            "timedelta64",548            PeriodDtype("Q"),549        ],550    )551    def test_equality_generic(self, subtype):552        # GH 18980553        dtype = IntervalDtype(subtype)554        assert is_dtype_equal(dtype, "interval")555        assert is_dtype_equal(dtype, IntervalDtype())556    @pytest.mark.parametrize(557        "subtype",558        [559            "int64",560            "uint64",561            "float64",562            "complex128",563            "datetime64",564            "timedelta64",565            PeriodDtype("Q"),566        ],567    )568    def test_name_repr(self, subtype):569        # GH 18980570        dtype = IntervalDtype(subtype)571        expected = f"interval[{subtype}]"572        assert str(dtype) == expected573        assert dtype.name == "interval"574    @pytest.mark.parametrize("subtype", [None, "interval", "Interval"])575    def test_name_repr_generic(self, subtype):576        # GH 18980577        dtype = IntervalDtype(subtype)578        assert str(dtype) == "interval"579        assert dtype.name == "interval"580    def test_basic(self, dtype):581        assert is_interval_dtype(dtype)582        ii = IntervalIndex.from_breaks(range(3))583        assert is_interval_dtype(ii.dtype)584        assert is_interval_dtype(ii)585        s = Series(ii, name="A")586        assert is_interval_dtype(s.dtype)587        assert is_interval_dtype(s)588    def test_basic_dtype(self):589        assert is_interval_dtype("interval[int64]")590        assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)]))591        assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4)))592        assert is_interval_dtype(593            IntervalIndex.from_breaks(date_range("20130101", periods=3))594        )595        assert not is_interval_dtype("U")596        assert not is_interval_dtype("S")597        assert not is_interval_dtype("foo")598        assert not is_interval_dtype(np.object_)599        assert not is_interval_dtype(np.int64)600        assert not is_interval_dtype(np.float64)601    def test_caching(self):602        IntervalDtype.reset_cache()603        dtype = IntervalDtype("int64")604        assert len(IntervalDtype._cache) == 1605        IntervalDtype("interval")606        assert len(IntervalDtype._cache) == 2607        IntervalDtype.reset_cache()608        tm.round_trip_pickle(dtype)609        assert len(IntervalDtype._cache) == 0610    def test_not_string(self):611        # GH30568: though IntervalDtype has object kind, it cannot be string612        assert not is_string_dtype(IntervalDtype())613class TestCategoricalDtypeParametrized:614    @pytest.mark.parametrize(615        "categories",616        [617            list("abcd"),618            np.arange(1000),619            ["a", "b", 10, 2, 1.3, True],620            [True, False],621            pd.date_range("2017", periods=4),622        ],623    )624    def test_basic(self, categories, ordered):625        c1 = CategoricalDtype(categories, ordered=ordered)626        tm.assert_index_equal(c1.categories, pd.Index(categories))627        assert c1.ordered is ordered628    def test_order_matters(self):629        categories = ["a", "b"]630        c1 = CategoricalDtype(categories, ordered=True)631        c2 = CategoricalDtype(categories, ordered=False)632        c3 = CategoricalDtype(categories, ordered=None)633        assert c1 is not c2634        assert c1 is not c3635    @pytest.mark.parametrize("ordered", [False, None])636    def test_unordered_same(self, ordered):637        c1 = CategoricalDtype(["a", "b"], ordered=ordered)638        c2 = CategoricalDtype(["b", "a"], ordered=ordered)639        assert hash(c1) == hash(c2)640    def test_categories(self):641        result = CategoricalDtype(["a", "b", "c"])642        tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"]))643        assert result.ordered is False644    def test_equal_but_different(self, ordered):645        c1 = CategoricalDtype([1, 2, 3])646        c2 = CategoricalDtype([1.0, 2.0, 3.0])647        assert c1 is not c2648        assert c1 != c2649    @pytest.mark.parametrize("v1, v2", [([1, 2, 3], [1, 2, 3]), ([1, 2, 3], [3, 2, 1])])650    def test_order_hashes_different(self, v1, v2):651        c1 = CategoricalDtype(v1, ordered=False)652        c2 = CategoricalDtype(v2, ordered=True)653        c3 = CategoricalDtype(v1, ordered=None)654        assert c1 is not c2655        assert c1 is not c3656    def test_nan_invalid(self):657        msg = "Categorical categories cannot be null"658        with pytest.raises(ValueError, match=msg):659            CategoricalDtype([1, 2, np.nan])660    def test_non_unique_invalid(self):661        msg = "Categorical categories must be unique"662        with pytest.raises(ValueError, match=msg):663            CategoricalDtype([1, 2, 1])664    def test_same_categories_different_order(self):665        c1 = CategoricalDtype(["a", "b"], ordered=True)666        c2 = CategoricalDtype(["b", "a"], ordered=True)667        assert c1 is not c2668    @pytest.mark.parametrize("ordered1", [True, False, None])669    @pytest.mark.parametrize("ordered2", [True, False, None])670    def test_categorical_equality(self, ordered1, ordered2):671        # same categories, same order672        # any combination of None/False are equal673        # True/True is the only combination with True that are equal674        c1 = CategoricalDtype(list("abc"), ordered1)675        c2 = CategoricalDtype(list("abc"), ordered2)676        result = c1 == c2677        expected = bool(ordered1) is bool(ordered2)678        assert result is expected679        # same categories, different order680        # any combination of None/False are equal (order doesn't matter)681        # any combination with True are not equal (different order of cats)682        c1 = CategoricalDtype(list("abc"), ordered1)683        c2 = CategoricalDtype(list("cab"), ordered2)684        result = c1 == c2685        expected = (bool(ordered1) is False) and (bool(ordered2) is False)686        assert result is expected687        # different categories688        c2 = CategoricalDtype([1, 2, 3], ordered2)689        assert c1 != c2690        # none categories691        c1 = CategoricalDtype(list("abc"), ordered1)692        c2 = CategoricalDtype(None, ordered2)693        c3 = CategoricalDtype(None, ordered1)694        assert c1 == c2695        assert c2 == c1696        assert c2 == c3697    @pytest.mark.parametrize("categories", [list("abc"), None])698    @pytest.mark.parametrize("other", ["category", "not a category"])699    def test_categorical_equality_strings(self, categories, ordered, other):700        c1 = CategoricalDtype(categories, ordered)701        result = c1 == other702        expected = other == "category"703        assert result is expected704    def test_invalid_raises(self):705        with pytest.raises(TypeError, match="ordered"):706            CategoricalDtype(["a", "b"], ordered="foo")707        with pytest.raises(TypeError, match="'categories' must be list-like"):708            CategoricalDtype("category")709    def test_mixed(self):710        a = CategoricalDtype(["a", "b", 1, 2])711        b = CategoricalDtype(["a", "b", "1", "2"])712        assert hash(a) != hash(b)713    def test_from_categorical_dtype_identity(self):714        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)715        # Identity test for no changes716        c2 = CategoricalDtype._from_categorical_dtype(c1)717        assert c2 is c1718    def test_from_categorical_dtype_categories(self):719        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)720        # override categories721        result = CategoricalDtype._from_categorical_dtype(c1, categories=[2, 3])722        assert result == CategoricalDtype([2, 3], ordered=True)723    def test_from_categorical_dtype_ordered(self):724        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)725        # override ordered726        result = CategoricalDtype._from_categorical_dtype(c1, ordered=False)727        assert result == CategoricalDtype([1, 2, 3], ordered=False)728    def test_from_categorical_dtype_both(self):729        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)730        # override ordered731        result = CategoricalDtype._from_categorical_dtype(732            c1, categories=[1, 2], ordered=False733        )734        assert result == CategoricalDtype([1, 2], ordered=False)735    def test_str_vs_repr(self, ordered):736        c1 = CategoricalDtype(["a", "b"], ordered=ordered)737        assert str(c1) == "category"738        # Py2 will have unicode prefixes739        pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)"740        assert re.match(pat.format(ordered=ordered), repr(c1))741    def test_categorical_categories(self):742        # GH17884743        c1 = CategoricalDtype(Categorical(["a", "b"]))744        tm.assert_index_equal(c1.categories, pd.Index(["a", "b"]))745        c1 = CategoricalDtype(CategoricalIndex(["a", "b"]))746        tm.assert_index_equal(c1.categories, pd.Index(["a", "b"]))747    @pytest.mark.parametrize(748        "new_categories", [list("abc"), list("cba"), list("wxyz"), None]749    )750    @pytest.mark.parametrize("new_ordered", [True, False, None])751    def test_update_dtype(self, ordered, new_categories, new_ordered):752        original_categories = list("abc")753        dtype = CategoricalDtype(original_categories, ordered)754        new_dtype = CategoricalDtype(new_categories, new_ordered)755        result = dtype.update_dtype(new_dtype)756        expected_categories = pd.Index(new_categories or original_categories)757        expected_ordered = new_ordered if new_ordered is not None else dtype.ordered758        tm.assert_index_equal(result.categories, expected_categories)759        assert result.ordered is expected_ordered760    def test_update_dtype_string(self, ordered):761        dtype = CategoricalDtype(list("abc"), ordered)762        expected_categories = dtype.categories763        expected_ordered = dtype.ordered764        result = dtype.update_dtype("category")765        tm.assert_index_equal(result.categories, expected_categories)766        assert result.ordered is expected_ordered767    @pytest.mark.parametrize("bad_dtype", ["foo", object, np.int64, PeriodDtype("Q")])768    def test_update_dtype_errors(self, bad_dtype):769        dtype = CategoricalDtype(list("abc"), False)770        msg = "a CategoricalDtype must be passed to perform an update, "771        with pytest.raises(ValueError, match=msg):772            dtype.update_dtype(bad_dtype)773@pytest.mark.parametrize(774    "dtype", [CategoricalDtype, IntervalDtype, DatetimeTZDtype, PeriodDtype]775)776def test_registry(dtype):777    assert dtype in registry.dtypes778@pytest.mark.parametrize(779    "dtype, expected",780    [781        ("int64", None),782        ("interval", IntervalDtype()),783        ("interval[int64]", IntervalDtype()),784        ("interval[datetime64[ns]]", IntervalDtype("datetime64[ns]")),785        ("period[D]", PeriodDtype("D")),786        ("category", CategoricalDtype()),787        ("datetime64[ns, US/Eastern]", DatetimeTZDtype("ns", "US/Eastern")),788    ],789)790def test_registry_find(dtype, expected):791    assert registry.find(dtype) == expected792@pytest.mark.parametrize(793    "dtype, expected",794    [795        (str, False),796        (int, False),797        (bool, True),798        (np.bool_, True),799        (np.array(["a", "b"]), False),800        (pd.Series([1, 2]), False),801        (np.array([True, False]), True),802        (pd.Series([True, False]), True),803        (SparseArray([True, False]), True),804        (SparseDtype(bool), True),805    ],806)807def test_is_bool_dtype(dtype, expected):808    result = is_bool_dtype(dtype)809    assert result is expected810def test_is_bool_dtype_sparse():811    result = is_bool_dtype(pd.Series(SparseArray([True, False])))812    assert result is True813@pytest.mark.parametrize(814    "check",815    [816        is_categorical_dtype,817        is_datetime64tz_dtype,818        is_period_dtype,819        is_datetime64_ns_dtype,820        is_datetime64_dtype,821        is_interval_dtype,822        is_datetime64_any_dtype,823        is_string_dtype,824        is_bool_dtype,825    ],...register_functions.py
Source:register_functions.py  
...3    tint32, tint64, tfloat32, tfloat64, tndarray4from .ir import register_function, register_seeded_function5def register_functions():6    locusVar = tvariable("R", "locus")7    register_function("isValidContig", (dtype("str"),), dtype("bool"), (locusVar,))8    register_function("isValidLocus", (dtype("str"), dtype("int32"),), dtype("bool"), (locusVar,))9    register_function("contigLength", (dtype("str"),), dtype("int32"), (locusVar,))10    register_function("getReferenceSequenceFromValidLocus", (dtype("str"), dtype("int32"), dtype("int32"), dtype("int32"),), dtype("str"), (locusVar,))11    register_function("getReferenceSequence", (dtype("str"), dtype("int32"), dtype("int32"), dtype("int32"),), dtype("str"), (locusVar,))12    register_function("parse_json", (dtype("str"),), dtype("tuple(?T)"), (dtype("?T"),))13    register_function("flatten", (dtype("array<array<?T>>"),), dtype("array<?T>"))14    register_function("difference", (dtype("set<?T>"), dtype("set<?T>"),), dtype("set<?T>"))15    register_function("median", (dtype("set<?T:numeric>"),), dtype("?T"))16    register_function("median", (dtype("array<?T:numeric>"),), dtype("?T"))17    register_function("uniqueMinIndex", (dtype("array<?T>"),), dtype("int32"))18    register_function("mean", (dtype("array<?T:numeric>"),), dtype("float64"))19    register_function("toFloat32", (dtype("?T:numeric"),), dtype("float32"))20    register_function("uniqueMaxIndex", (dtype("array<?T>"),), dtype("int32"))21    register_function("toSet", (dtype("array<?T>"),), dtype("set<?T>"))22    def array_floating_point_divide(arg_type, ret_type):23        register_function("div", (arg_type, tarray(arg_type),), tarray(ret_type))24        register_function("div", (tarray(arg_type), arg_type), tarray(ret_type))25        register_function("div", (tarray(arg_type), tarray(arg_type)), tarray(ret_type))26    array_floating_point_divide(tint32, tfloat32)27    array_floating_point_divide(tint64, tfloat32)28    array_floating_point_divide(tfloat32, tfloat32)29    array_floating_point_divide(tfloat64, tfloat64)30    def ndarray_floating_point_divide(arg_type, ret_type):31        register_function("div", (arg_type, tndarray(arg_type, NatVariable()),), tndarray(ret_type, NatVariable()))32        register_function("div", (tndarray(arg_type, NatVariable()), arg_type), tndarray(ret_type, NatVariable()))33        register_function("div", (tndarray(arg_type, NatVariable()),34                                  tndarray(arg_type, NatVariable())), tndarray(ret_type, NatVariable()))35    ndarray_floating_point_divide(tint32, tfloat32)36    ndarray_floating_point_divide(tint64, tfloat32)37    ndarray_floating_point_divide(tfloat32, tfloat32)38    ndarray_floating_point_divide(tfloat64, tfloat64)39    register_function("values", (dtype("dict<?key, ?value>"),), dtype("array<?value>"))40    register_function("sliceRight", (dtype("array<?T>"), dtype("int32"),), dtype("array<?T>"))41    register_function("sliceRight", (dtype("str"), dtype("int32"),), dtype("str"))42    register_function("get", (dtype("dict<?key, ?value>"), dtype("?key"),), dtype("?value"))43    register_function("get", (dtype("dict<?key, ?value>"), dtype("?key"), dtype("?value"),), dtype("?value"))44    register_function("max", (dtype("array<?T:numeric>"),), dtype("?T"))45    register_function("nanmax", (dtype("array<?T:numeric>"),), dtype("?T"))46    register_function("max", (dtype("?T"), dtype("?T"),), dtype("?T"))47    register_function("nanmax", (dtype("?T"), dtype("?T"),), dtype("?T"))48    register_function("max_ignore_missing", (dtype("?T"), dtype("?T"),), dtype("?T"))49    register_function("nanmax_ignore_missing", (dtype("?T"), dtype("?T"),), dtype("?T"))50    register_function("product", (dtype("array<?T:numeric>"),), dtype("?T"))51    register_function("toInt32", (dtype("?T:numeric"),), dtype("int32"))52    register_function("extend", (dtype("array<?T>"), dtype("array<?T>"),), dtype("array<?T>"))53    register_function("argmin", (dtype("array<?T>"),), dtype("int32"))54    register_function("toFloat64", (dtype("?T:numeric"),), dtype("float64"))55    register_function("sort", (dtype("array<?T>"),), dtype("array<?T>"))56    register_function("sort", (dtype("array<?T>"), dtype("bool"),), dtype("array<?T>"))57    register_function("isSubset", (dtype("set<?T>"), dtype("set<?T>"),), dtype("bool"))58    register_function("slice", (dtype("str"), dtype("int32"), dtype("int32"),), dtype("str"))59    register_function("slice", (dtype("array<?T>"), dtype("int32"), dtype("int32"),), dtype("array<?T>"))60    register_function("add", (dtype("array<?T:numeric>"), dtype("array<?T>"),), dtype("array<?T>"))61    register_function("add", (dtype("array<?T:numeric>"), dtype("?T"),), dtype("array<?T>"))62    register_function("add", (dtype("?T:numeric"), dtype("array<?T>"),), dtype("array<?T>"))63    register_function("add", (dtype("ndarray<?T:numeric, ?nat>"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))64    register_function("add", (dtype("ndarray<?T:numeric, ?nat>"), dtype("?T"),), dtype("ndarray<?T, ?nat>"))65    register_function("add", (dtype("?T:numeric"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))66    register_function("pow", (dtype("array<?T:numeric>"), dtype("array<?T>"),), dtype("array<float64>"))67    register_function("pow", (dtype("array<?T:numeric>"), dtype("?T"),), dtype("array<float64>"))68    register_function("pow", (dtype("?T:numeric"), dtype("array<?T>"),), dtype("array<float64>"))69    register_function("pow", (dtype("ndarray<?T:numeric, ?nat>"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))70    register_function("pow", (dtype("ndarray<?T:numeric, ?nat>"), dtype("?T"),), dtype("ndarray<?T, ?nat>"))71    register_function("pow", (dtype("?T:numeric"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))72    register_function("append", (dtype("array<?T>"), dtype("?T"),), dtype("array<?T>"))73    register_function("sliceLeft", (dtype("str"), dtype("int32"),), dtype("str"))74    register_function("sliceLeft", (dtype("array<?T>"), dtype("int32"),), dtype("array<?T>"))75    register_function("remove", (dtype("set<?T>"), dtype("?T"),), dtype("set<?T>"))76    register_function("index", (dtype("str"), dtype("int32"),), dtype("str"))77    register_function("indexArray", (dtype("array<?T>"), dtype("int32"), dtype("str")), dtype("?T"))78    register_function("index", (dtype("dict<?key, ?value>"), dtype("?key"),), dtype("?value"))79    register_function("dictToArray", (dtype("dict<?key, ?value>"),), dtype("array<tuple(?key, ?value)>"))80    register_function("mod", (dtype("array<?T:numeric>"), dtype("array<?T>"),), dtype("array<?T>"))81    register_function("mod", (dtype("array<?T:numeric>"), dtype("?T"),), dtype("array<?T>"))82    register_function("mod", (dtype("?T:numeric"), dtype("array<?T>"),), dtype("array<?T>"))83    register_function("mod", (dtype("ndarray<?T:numeric, ?nat>"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))84    register_function("mod", (dtype("ndarray<?T:numeric, ?nat>"), dtype("?T"),), dtype("ndarray<?T, ?nat>"))85    register_function("mod", (dtype("?T:numeric"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))86    register_function("dict", (dtype("array<tuple(?key, ?value)>"),), dtype("dict<?key, ?value>"))87    register_function("dict", (dtype("set<tuple(?key, ?value)>"),), dtype("dict<?key, ?value>"))88    register_function("keys", (dtype("dict<?key, ?value>"),), dtype("array<?key>"))89    register_function("min", (dtype("array<?T:numeric>"),), dtype("?T"))90    register_function("nanmin", (dtype("array<?T:numeric>"),), dtype("?T"))91    register_function("min", (dtype("?T"), dtype("?T"),), dtype("?T"))92    register_function("nanmin", (dtype("?T"), dtype("?T"),), dtype("?T"))93    register_function("min_ignore_missing", (dtype("?T"), dtype("?T"),), dtype("?T"))94    register_function("nanmin_ignore_missing", (dtype("?T"), dtype("?T"),), dtype("?T"))95    register_function("sum", (dtype("array<?T:numeric>"),), dtype("?T"))96    register_function("toInt64", (dtype("?T:numeric"),), dtype("int64"))97    register_function("contains", (dtype("dict<?key, ?value>"), dtype("?key"),), dtype("bool"))98    register_function("contains", (dtype("array<?T>"), dtype("?T"),), dtype("bool"))99    register_function("contains", (dtype("set<?T>"), dtype("?T"),), dtype("bool"))100    register_function("-", (dtype("array<?T:numeric>"), dtype("?T"),), dtype("array<?T>"))101    register_function("-", (dtype("array<?T:numeric>"), dtype("array<?T>"),), dtype("array<?T>"))102    register_function("-", (dtype("?T:numeric"), dtype("array<?T>"),), dtype("array<?T>"))103    register_function("-", (dtype("ndarray<?T:numeric, ?nat>"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))104    register_function("-", (dtype("ndarray<?T:numeric, ?nat>"), dtype("?T"),), dtype("ndarray<?T, ?nat>"))105    register_function("-", (dtype("?T:numeric"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))106    register_function("addone", (dtype("int32"),), dtype("int32"))107    register_function("isEmpty", (dtype("dict<?key, ?value>"),), dtype("bool"))108    register_function("isEmpty", (dtype("array<?T>"),), dtype("bool"))109    register_function("isEmpty", (dtype("set<?T>"),), dtype("bool"))110    register_function("union", (dtype("set<?T>"), dtype("set<?T>"),), dtype("set<?T>"))111    register_function("mul", (dtype("array<?T:numeric>"), dtype("array<?T>"),), dtype("array<?T>"))112    register_function("mul", (dtype("array<?T:numeric>"), dtype("?T"),), dtype("array<?T>"))113    register_function("mul", (dtype("?T:numeric"), dtype("array<?T>"),), dtype("array<?T>"))114    register_function("mul", (dtype("ndarray<?T:numeric, ?nat>"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))115    register_function("mul", (dtype("ndarray<?T:numeric, ?nat>"), dtype("?T"),), dtype("ndarray<?T, ?nat>"))116    register_function("mul", (dtype("?T:numeric"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))117    register_function("intersection", (dtype("set<?T>"), dtype("set<?T>"),), dtype("set<?T>"))118    register_function("add", (dtype("set<?T>"), dtype("?T"),), dtype("set<?T>"))119    register_function("argmax", (dtype("array<?T>"),), dtype("int32"))120    register_function("floordiv", (dtype("array<?T:numeric>"), dtype("array<?T>"),), dtype("array<?T>"))121    register_function("floordiv", (dtype("array<?T:numeric>"), dtype("?T"),), dtype("array<?T>"))122    register_function("floordiv", (dtype("?T:numeric"), dtype("array<?T>"),), dtype("array<?T>"))123    register_function("floordiv", (dtype("ndarray<?T:numeric, ?nat>"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))124    register_function("floordiv", (dtype("ndarray<?T:numeric, ?nat>"), dtype("?T"),), dtype("ndarray<?T, ?nat>"))125    register_function("floordiv", (dtype("?T:numeric"), dtype("ndarray<?T, ?nat>"),), dtype("ndarray<?T, ?nat>"))126    register_function("keySet", (dtype("dict<?key, ?value>"),), dtype("set<?key>"))127    register_function("qnorm", (dtype("float64"),), dtype("float64"))128    register_function("oneHotAlleles", (dtype("call"), dtype("int32"),), dtype("array<int32>"))129    register_function("dpois", (dtype("float64"), dtype("float64"), dtype("bool"),), dtype("float64"))130    register_function("dpois", (dtype("float64"), dtype("float64"),), dtype("float64"))131    register_function("ploidy", (dtype("call"),), dtype("int32"))132    register_function("lor", (dtype("bool"), dtype("bool"),), dtype("bool"))133    register_function("ppois", (dtype("float64"), dtype("float64"), dtype("bool"), dtype("bool"),), dtype("float64"))134    register_function("ppois", (dtype("float64"), dtype("float64"),), dtype("float64"))135    register_function("log10", (dtype("float64"),), dtype("float64"))136    register_function("isHet", (dtype("call"),), dtype("bool"))137    register_function("isAutosomalOrPseudoAutosomal", (dtype("?T:locus"),), dtype("bool"))138    register_function("testCodeUnification", (dtype("?x:numeric"), dtype("?x:int32"),), dtype("?x"))139    register_seeded_function("rand_pois", (dtype("float64"),), dtype("float64"))140    register_seeded_function("rand_pois", (dtype("int32"), dtype("float64"),), dtype("array<float64>"))141    register_function("toFloat32", (dtype("str"),), dtype("float32"))142    register_function("toFloat32", (dtype("bool"),), dtype("float32"))143    register_function("isAutosomal", (dtype("?T:locus"),), dtype("bool"))144    register_function("isPhased", (dtype("call"),), dtype("bool"))145    register_function("isHomVar", (dtype("call"),), dtype("bool"))146    register_function("corr", (dtype("array<float64>"), dtype("array<float64>"),), dtype("float64"))147    register_function("log", (dtype("float64"), dtype("float64"),), dtype("float64"))148    register_function("log", (dtype("float64"),), dtype("float64"))149    register_function("foobar2", (), dtype("int32"))150    register_function("approxEqual", (dtype("float64"), dtype("float64"), dtype("float64"), dtype("bool"), dtype("bool"),), dtype("bool"))151    register_function("includesEnd", (dtype("interval<?T>"),), dtype("bool"))152    register_function("position", (dtype("?T:locus"),), dtype("int32"))153    register_seeded_function("rand_unif", (dtype("float64"), dtype("float64"),), dtype("float64"))154    register_function("showStr", (dtype("?T"), dtype("int32")), dtype("str"))155    register_function("str", (dtype("?T"),), dtype("str"))156    register_function("valuesSimilar", (dtype("?T"), dtype("?T"), dtype('float64'), dtype('bool'),), dtype("bool"))157    register_function("replace", (dtype("str"), dtype("str"), dtype("str"),), dtype("str"))158    register_function("exp", (dtype("float64"),), dtype("float64"))159    register_function("land", (dtype("bool"), dtype("bool"),), dtype("bool"))160    register_function("compare", (dtype("int32"), dtype("int32"),), dtype("int32"))161    register_function("triangle", (dtype("int32"),), dtype("int32"))162    register_function("Interval", (dtype("?T"), dtype("?T"), dtype("bool"), dtype("bool"),), dtype("interval<?T>"))163    register_function("contig", (dtype("?T:locus"),), dtype("str"))164    register_function("Call", (dtype("bool"),), dtype("call"))165    register_function("Call", (dtype("str"),), dtype("call"))166    register_function("Call", (dtype("int32"), dtype("bool"),), dtype("call"))167    register_function("Call", (dtype("int32"), dtype("int32"), dtype("bool"),), dtype("call"))168    register_function("Call", (dtype("array<int32>"), dtype("bool"),), dtype("call"))169    register_function("qchisqtail", (dtype("float64"), dtype("float64"),), dtype("float64"))170    register_function("binomTest", (dtype("int32"), dtype("int32"), dtype("float64"), dtype("int32"),), dtype("float64"))171    register_function("qpois", (dtype("float64"), dtype("float64"),), dtype("int32"))172    register_function("qpois", (dtype("float64"), dtype("float64"), dtype("bool"), dtype("bool"),), dtype("int32"))173    register_function("is_finite", (dtype("float32"),), dtype("bool"))174    register_function("is_finite", (dtype("float64"),), dtype("bool"))175    register_function("inYPar", (dtype("?T:locus"),), dtype("bool"))176    register_function("contingency_table_test", (dtype("int32"), dtype("int32"), dtype("int32"), dtype("int32"), dtype("int32"),), dtype("struct{p_value: float64, odds_ratio: float64}"))177    register_function("toInt32", (dtype("bool"),), dtype("int32"))178    register_function("toInt32", (dtype("str"),), dtype("int32"))179    register_function("foobar1", (), dtype("int32"))180    register_function("toFloat64", (dtype("str"),), dtype("float64"))181    register_function("toFloat64", (dtype("bool"),), dtype("float64"))182    register_function("dbeta", (dtype("float64"), dtype("float64"), dtype("float64"),), dtype("float64"))183    register_function("Locus", (dtype("str"),), dtype("?T:locus"))184    register_function("Locus", (dtype("str"), dtype("int32"),), dtype("?T:locus"))185    register_function("LocusAlleles", (dtype("str"),), dtype("struct{locus: ?T, alleles: array<str>}"))186    register_function("LocusInterval", (dtype("str"), dtype("bool"),), dtype("interval<?T:locus>"))187    register_function("LocusInterval", (dtype("str"), dtype("int32"), dtype("int32"), dtype("bool"), dtype("bool"), dtype("bool"),), dtype("interval<?T:locus>"))188    register_function("globalPosToLocus", (dtype("int64"),), dtype("?T:locus"))189    register_function("locusToGlobalPos", (dtype("?T:locus"),), dtype("int64"))190    register_function("liftoverLocus", (dtype("?T:locus"), dtype('float64'),), dtype("struct{result:?U:locus,is_negative_strand:bool}"))191    register_function("liftoverLocusInterval", (dtype("interval<?T:locus>"), dtype('float64'),), dtype("struct{result:interval<?U:locus>,is_negative_strand:bool}"))192    register_function("min_rep", (dtype("?T:locus"), dtype("array<str>"),), dtype("struct{locus: ?T, alleles: array<str>}"))193    register_function("locus_windows_per_contig", (dtype("array<array<float64>>"), dtype("float64"),), dtype("tuple(array<int32>, array<int32>)"))194    register_function("toBoolean", (dtype("str"),), dtype("bool"))195    register_seeded_function("rand_bool", (dtype("float64"),), dtype("bool"))196    register_function("pchisqtail", (dtype("float64"), dtype("float64"),), dtype("float64"))197    register_seeded_function("rand_cat", (dtype("array<float64>"),), dtype("int32"))198    register_function("inYNonPar", (dtype("?T:locus"),), dtype("bool"))199    register_function("concat", (dtype("str"), dtype("str"),), dtype("str"))200    register_function("pow", (dtype("float32"), dtype("float32"),), dtype("float64"))201    register_function("pow", (dtype("int32"), dtype("int32"),), dtype("float64"))202    register_function("pow", (dtype("int64"), dtype("int64"),), dtype("float64"))203    register_function("pow", (dtype("float64"), dtype("float64"),), dtype("float64"))204    register_function("length", (dtype("str"),), dtype("int32"))205    register_function("slice", (dtype("str"), dtype("int32"), dtype("int32"),), dtype("str"))206    register_function("split", (dtype("str"), dtype("str"), dtype("int32"),), dtype("array<str>"))207    register_function("split", (dtype("str"), dtype("str"),), dtype("array<str>"))208    register_seeded_function("rand_gamma", (dtype("float64"), dtype("float64"),), dtype("float64"))209    register_function("UnphasedDiploidGtIndexCall", (dtype("int32"),), dtype("call"))210    register_function("index", (dtype("call"), dtype("int32"),), dtype("int32"))211    register_function("sign", (dtype("int64"),), dtype("int64"))212    register_function("sign", (dtype("float64"),), dtype("float64"))213    register_function("sign", (dtype("float32"),), dtype("float32"))214    register_function("sign", (dtype("int32"),), dtype("int32"))215    register_function("unphasedDiploidGtIndex", (dtype("call"),), dtype("int32"))216    register_function("gamma", (dtype("float64"),), dtype("float64"))217    register_function("mod", (dtype("float64"), dtype("float64"),), dtype("float64"))218    register_function("mod", (dtype("int64"), dtype("int64"),), dtype("int64"))219    register_function("mod", (dtype("float32"), dtype("float32"),), dtype("float32"))220    register_function("mod", (dtype("int32"), dtype("int32"),), dtype("int32"))221    register_function("fisher_exact_test", (dtype("int32"), dtype("int32"), dtype("int32"), dtype("int32"),), dtype("struct{p_value: float64, odds_ratio: float64, ci_95_lower: float64, ci_95_upper: float64}"))222    register_function("floor", (dtype("float64"),), dtype("float64"))223    register_function("floor", (dtype("float32"),), dtype("float32"))224    register_function("isNonRef", (dtype("call"),), dtype("bool"))225    register_function("includesStart", (dtype("interval<?T>"),), dtype("bool"))226    register_function("isHetNonRef", (dtype("call"),), dtype("bool"))227    register_function("hardy_weinberg_test", (dtype("int32"), dtype("int32"), dtype("int32"),), dtype("struct{het_freq_hwe: float64, p_value: float64}"))228    register_function("haplotype_freq_em", (dtype("array<int32>"),), dtype("array<float64>"))229    register_function("nNonRefAlleles", (dtype("call"),), dtype("int32"))230    register_function("abs", (dtype("float64"),), dtype("float64"))231    register_function("abs", (dtype("float32"),), dtype("float32"))232    register_function("abs", (dtype("int64"),), dtype("int64"))233    register_function("abs", (dtype("int32"),), dtype("int32"))234    register_function("endswith", (dtype("str"), dtype("str"),), dtype("bool"))235    register_function("sqrt", (dtype("float64"),), dtype("float64"))236    register_function("isnan", (dtype("float32"),), dtype("bool"))237    register_function("isnan", (dtype("float64"),), dtype("bool"))238    register_function("lower", (dtype("str"),), dtype("str"))239    register_seeded_function("rand_beta", (dtype("float64"), dtype("float64"),), dtype("float64"))240    register_seeded_function("rand_beta", (dtype("float64"), dtype("float64"), dtype("float64"), dtype("float64"),), dtype("float64"))241    register_function("toInt64", (dtype("bool"),), dtype("int64"))242    register_function("toInt64", (dtype("str"),), dtype("int64"))243    register_function("testCodeUnification2", (dtype("?x"),), dtype("?x"))244    register_function("contains", (dtype("str"), dtype("str"),), dtype("bool"))245    register_function("contains", (dtype("interval<?T>"), dtype("?T"),), dtype("bool"))246    register_function("entropy", (dtype("str"),), dtype("float64"))247    register_function("filtering_allele_frequency", (dtype("int32"), dtype("int32"), dtype("float64"),), dtype("float64"))248    register_function("gqFromPL", (dtype("array<?N:int32>"),), dtype("int32"))249    register_function("startswith", (dtype("str"), dtype("str"),), dtype("bool"))250    register_function("ceil", (dtype("float32"),), dtype("float32"))251    register_function("ceil", (dtype("float64"),), dtype("float64"))252    register_function("json", (dtype("?T"),), dtype("str"))253    register_function("strip", (dtype("str"),), dtype("str"))254    register_function("firstMatchIn", (dtype("str"), dtype("str"),), dtype("array<str>"))255    register_function("isEmpty", (dtype("interval<?T>"),), dtype("bool"))256    register_function("~", (dtype("str"), dtype("str"),), dtype("bool"))257    register_function("mkString", (dtype("set<str>"), dtype("str"),), dtype("str"))258    register_function("mkString", (dtype("array<str>"), dtype("str"),), dtype("str"))259    register_function("dosage", (dtype("array<?N:float64>"),), dtype("float64"))260    register_function("upper", (dtype("str"),), dtype("str"))261    register_function("overlaps", (dtype("interval<?T>"), dtype("interval<?T>"),), dtype("bool"))262    register_function("downcode", (dtype("call"), dtype("int32"),), dtype("call"))263    register_function("inXPar", (dtype("?T:locus"),), dtype("bool"))264    register_function("format", (dtype("str"), dtype("?T:tuple"),), dtype("str"))265    register_function("pnorm", (dtype("float64"),), dtype("float64"))266    register_function("is_infinite", (dtype("float32"),), dtype("bool"))267    register_function("is_infinite", (dtype("float64"),), dtype("bool"))268    register_function("isHetRef", (dtype("call"),), dtype("bool"))269    register_function("isMitochondrial", (dtype("?T:locus"),), dtype("bool"))270    register_function("hamming", (dtype("str"), dtype("str"),), dtype("int32"))271    register_function("end", (dtype("interval<?T>"),), dtype("?T"))272    register_function("start", (dtype("interval<?T>"),), dtype("?T"))273    register_function("inXNonPar", (dtype("?T:locus"),), dtype("bool"))274    register_function("escapeString", (dtype("str"),), dtype("str"))275    register_function("isHomRef", (dtype("call"),), dtype("bool"))276    register_seeded_function("rand_norm", (dtype("float64"), dtype("float64"),), dtype("float64"))277    register_function("chi_squared_test", (dtype("int32"), dtype("int32"), dtype("int32"), dtype("int32"),), dtype("struct{p_value: float64, odds_ratio: float64}"))278    register_function("strftime", (dtype("str"), dtype("int64"), dtype("str")), dtype("str"))...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
