Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use setup_path method in Behave

Best Python code snippet using behave

test_store.py

Source:test_store.py

1import datetime2from datetime import timedelta3from distutils.version import LooseVersion4import hashlib5from io import BytesIO6import os7from pathlib import Path8import re9import time10from warnings import catch_warnings, simplefilter11import numpy as np12import pytest13from pandas.compat import is_platform_little_endian, is_platform_windows14import pandas.util._test_decorators as td15import pandas as pd16from pandas import (17    Categorical,18    CategoricalIndex,19    DataFrame,20    DatetimeIndex,21    Index,22    Int64Index,23    MultiIndex,24    RangeIndex,25    Series,26    Timestamp,27    bdate_range,28    concat,29    date_range,30    isna,31    timedelta_range,32)33import pandas._testing as tm34from pandas.tests.io.pytables.common import (35    _maybe_remove,36    create_tempfile,37    ensure_clean_path,38    ensure_clean_store,39    safe_close,40    safe_remove,41    tables,42)43from pandas.io.pytables import (44    ClosedFileError,45    HDFStore,46    PossibleDataLossError,47    Term,48    read_hdf,49)50from pandas.io import pytables as pytables  # noqa: E402 isort:skip51from pandas.io.pytables import TableIterator  # noqa: E402 isort:skip52_default_compressor = "blosc"53ignore_natural_naming_warning = pytest.mark.filterwarnings(54    "ignore:object name:tables.exceptions.NaturalNameWarning"55)56@pytest.mark.single57class TestHDFStore:58    def test_format_type(self, setup_path):59        df = pd.DataFrame({"A": [1, 2]})60        with ensure_clean_path(setup_path) as path:61            with HDFStore(path) as store:62                store.put("a", df, format="fixed")63                store.put("b", df, format="table")64                assert store.get_storer("a").format_type == "fixed"65                assert store.get_storer("b").format_type == "table"66    def test_format_kwarg_in_constructor(self, setup_path):67        # GH 1329168        msg = "format is not a defined argument for HDFStore"69        with ensure_clean_path(setup_path) as path:70            with pytest.raises(ValueError, match=msg):71                HDFStore(path, format="table")72    def test_context(self, setup_path):73        path = create_tempfile(setup_path)74        try:75            with HDFStore(path) as tbl:76                raise ValueError("blah")77        except ValueError:78            pass79        finally:80            safe_remove(path)81        try:82            with HDFStore(path) as tbl:83                tbl["a"] = tm.makeDataFrame()84            with HDFStore(path) as tbl:85                assert len(tbl) == 186                assert type(tbl["a"]) == DataFrame87        finally:88            safe_remove(path)89    def test_conv_read_write(self, setup_path):90        path = create_tempfile(setup_path)91        try:92            def roundtrip(key, obj, **kwargs):93                obj.to_hdf(path, key, **kwargs)94                return read_hdf(path, key)95            o = tm.makeTimeSeries()96            tm.assert_series_equal(o, roundtrip("series", o))97            o = tm.makeStringSeries()98            tm.assert_series_equal(o, roundtrip("string_series", o))99            o = tm.makeDataFrame()100            tm.assert_frame_equal(o, roundtrip("frame", o))101            # table102            df = DataFrame(dict(A=range(5), B=range(5)))103            df.to_hdf(path, "table", append=True)104            result = read_hdf(path, "table", where=["index>2"])105            tm.assert_frame_equal(df[df.index > 2], result)106        finally:107            safe_remove(path)108    def test_long_strings(self, setup_path):109        # GH6166110        df = DataFrame(111            {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)112        )113        with ensure_clean_store(setup_path) as store:114            store.append("df", df, data_columns=["a"])115            result = store.select("df")116            tm.assert_frame_equal(df, result)117    def test_api(self, setup_path):118        # GH4584119        # API issue when to_hdf doesn't accept append AND format args120        with ensure_clean_path(setup_path) as path:121            df = tm.makeDataFrame()122            df.iloc[:10].to_hdf(path, "df", append=True, format="table")123            df.iloc[10:].to_hdf(path, "df", append=True, format="table")124            tm.assert_frame_equal(read_hdf(path, "df"), df)125            # append to False126            df.iloc[:10].to_hdf(path, "df", append=False, format="table")127            df.iloc[10:].to_hdf(path, "df", append=True, format="table")128            tm.assert_frame_equal(read_hdf(path, "df"), df)129        with ensure_clean_path(setup_path) as path:130            df = tm.makeDataFrame()131            df.iloc[:10].to_hdf(path, "df", append=True)132            df.iloc[10:].to_hdf(path, "df", append=True, format="table")133            tm.assert_frame_equal(read_hdf(path, "df"), df)134            # append to False135            df.iloc[:10].to_hdf(path, "df", append=False, format="table")136            df.iloc[10:].to_hdf(path, "df", append=True)137            tm.assert_frame_equal(read_hdf(path, "df"), df)138        with ensure_clean_path(setup_path) as path:139            df = tm.makeDataFrame()140            df.to_hdf(path, "df", append=False, format="fixed")141            tm.assert_frame_equal(read_hdf(path, "df"), df)142            df.to_hdf(path, "df", append=False, format="f")143            tm.assert_frame_equal(read_hdf(path, "df"), df)144            df.to_hdf(path, "df", append=False)145            tm.assert_frame_equal(read_hdf(path, "df"), df)146            df.to_hdf(path, "df")147            tm.assert_frame_equal(read_hdf(path, "df"), df)148        with ensure_clean_store(setup_path) as store:149            path = store._path150            df = tm.makeDataFrame()151            _maybe_remove(store, "df")152            store.append("df", df.iloc[:10], append=True, format="table")153            store.append("df", df.iloc[10:], append=True, format="table")154            tm.assert_frame_equal(store.select("df"), df)155            # append to False156            _maybe_remove(store, "df")157            store.append("df", df.iloc[:10], append=False, format="table")158            store.append("df", df.iloc[10:], append=True, format="table")159            tm.assert_frame_equal(store.select("df"), df)160            # formats161            _maybe_remove(store, "df")162            store.append("df", df.iloc[:10], append=False, format="table")163            store.append("df", df.iloc[10:], append=True, format="table")164            tm.assert_frame_equal(store.select("df"), df)165            _maybe_remove(store, "df")166            store.append("df", df.iloc[:10], append=False, format="table")167            store.append("df", df.iloc[10:], append=True, format=None)168            tm.assert_frame_equal(store.select("df"), df)169        with ensure_clean_path(setup_path) as path:170            # Invalid.171            df = tm.makeDataFrame()172            msg = "Can only append to Tables"173            with pytest.raises(ValueError, match=msg):174                df.to_hdf(path, "df", append=True, format="f")175            with pytest.raises(ValueError, match=msg):176                df.to_hdf(path, "df", append=True, format="fixed")177            msg = r"invalid HDFStore format specified \[foo\]"178            with pytest.raises(TypeError, match=msg):179                df.to_hdf(path, "df", append=True, format="foo")180            with pytest.raises(TypeError, match=msg):181                df.to_hdf(path, "df", append=False, format="foo")182        # File path doesn't exist183        path = ""184        msg = f"File {path} does not exist"185        with pytest.raises(FileNotFoundError, match=msg):186            read_hdf(path, "df")187    def test_api_default_format(self, setup_path):188        # default_format option189        with ensure_clean_store(setup_path) as store:190            df = tm.makeDataFrame()191            pd.set_option("io.hdf.default_format", "fixed")192            _maybe_remove(store, "df")193            store.put("df", df)194            assert not store.get_storer("df").is_table195            msg = "Can only append to Tables"196            with pytest.raises(ValueError, match=msg):197                store.append("df2", df)198            pd.set_option("io.hdf.default_format", "table")199            _maybe_remove(store, "df")200            store.put("df", df)201            assert store.get_storer("df").is_table202            _maybe_remove(store, "df2")203            store.append("df2", df)204            assert store.get_storer("df").is_table205            pd.set_option("io.hdf.default_format", None)206        with ensure_clean_path(setup_path) as path:207            df = tm.makeDataFrame()208            pd.set_option("io.hdf.default_format", "fixed")209            df.to_hdf(path, "df")210            with HDFStore(path) as store:211                assert not store.get_storer("df").is_table212            with pytest.raises(ValueError, match=msg):213                df.to_hdf(path, "df2", append=True)214            pd.set_option("io.hdf.default_format", "table")215            df.to_hdf(path, "df3")216            with HDFStore(path) as store:217                assert store.get_storer("df3").is_table218            df.to_hdf(path, "df4", append=True)219            with HDFStore(path) as store:220                assert store.get_storer("df4").is_table221            pd.set_option("io.hdf.default_format", None)222    def test_keys(self, setup_path):223        with ensure_clean_store(setup_path) as store:224            store["a"] = tm.makeTimeSeries()225            store["b"] = tm.makeStringSeries()226            store["c"] = tm.makeDataFrame()227            assert len(store) == 3228            expected = {"/a", "/b", "/c"}229            assert set(store.keys()) == expected230            assert set(store) == expected231    def test_no_track_times(self, setup_path):232        # GH 32682233        # enables to set track_times (see `pytables` `create_table` documentation)234        def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128):235            h = hash_factory()236            with open(filename, "rb") as f:237                for chunk in iter(lambda: f.read(chunk_num_blocks * h.block_size), b""):238                    h.update(chunk)239            return h.digest()240        def create_h5_and_return_checksum(track_times):241            with ensure_clean_path(setup_path) as path:242                df = pd.DataFrame({"a": [1]})243                with pd.HDFStore(path, mode="w") as hdf:244                    hdf.put(245                        "table",246                        df,247                        format="table",248                        data_columns=True,249                        index=None,250                        track_times=track_times,251                    )252                return checksum(path)253        checksum_0_tt_false = create_h5_and_return_checksum(track_times=False)254        checksum_0_tt_true = create_h5_and_return_checksum(track_times=True)255        # sleep is necessary to create h5 with different creation time256        time.sleep(1)257        checksum_1_tt_false = create_h5_and_return_checksum(track_times=False)258        checksum_1_tt_true = create_h5_and_return_checksum(track_times=True)259        # checksums are the same if track_time = False260        assert checksum_0_tt_false == checksum_1_tt_false261        # checksums are NOT same if track_time = True262        assert checksum_0_tt_true != checksum_1_tt_true263    def test_non_pandas_keys(self, setup_path):264        class Table1(tables.IsDescription):265            value1 = tables.Float32Col()266        class Table2(tables.IsDescription):267            value2 = tables.Float32Col()268        class Table3(tables.IsDescription):269            value3 = tables.Float32Col()270        with ensure_clean_path(setup_path) as path:271            with tables.open_file(path, mode="w") as h5file:272                group = h5file.create_group("/", "group")273                h5file.create_table(group, "table1", Table1, "Table 1")274                h5file.create_table(group, "table2", Table2, "Table 2")275                h5file.create_table(group, "table3", Table3, "Table 3")276            with HDFStore(path) as store:277                assert len(store.keys(include="native")) == 3278                expected = {"/group/table1", "/group/table2", "/group/table3"}279                assert set(store.keys(include="native")) == expected280                assert set(store.keys(include="pandas")) == set()281                for name in expected:282                    df = store.get(name)283                    assert len(df.columns) == 1284    def test_keys_illegal_include_keyword_value(self, setup_path):285        with ensure_clean_store(setup_path) as store:286            with pytest.raises(287                ValueError,288                match="`include` should be either 'pandas' or 'native' "289                "but is 'illegal'",290            ):291                store.keys(include="illegal")292    def test_keys_ignore_hdf_softlink(self, setup_path):293        # GH 20523294        # Puts a softlink into HDF file and rereads295        with ensure_clean_store(setup_path) as store:296            df = DataFrame(dict(A=range(5), B=range(5)))297            store.put("df", df)298            assert store.keys() == ["/df"]299            store._handle.create_soft_link(store._handle.root, "symlink", "df")300            # Should ignore the softlink301            assert store.keys() == ["/df"]302    def test_iter_empty(self, setup_path):303        with ensure_clean_store(setup_path) as store:304            # GH 12221305            assert list(store) == []306    def test_repr(self, setup_path):307        with ensure_clean_store(setup_path) as store:308            repr(store)309            store.info()310            store["a"] = tm.makeTimeSeries()311            store["b"] = tm.makeStringSeries()312            store["c"] = tm.makeDataFrame()313            df = tm.makeDataFrame()314            df["obj1"] = "foo"315            df["obj2"] = "bar"316            df["bool1"] = df["A"] > 0317            df["bool2"] = df["B"] > 0318            df["bool3"] = True319            df["int1"] = 1320            df["int2"] = 2321            df["timestamp1"] = Timestamp("20010102")322            df["timestamp2"] = Timestamp("20010103")323            df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)324            df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)325            df.loc[df.index[3:6], ["obj1"]] = np.nan326            df = df._consolidate()._convert(datetime=True)327            with catch_warnings(record=True):328                simplefilter("ignore", pd.errors.PerformanceWarning)329                store["df"] = df330            # make a random group in hdf space331            store._handle.create_group(store._handle.root, "bah")332            assert store.filename in repr(store)333            assert store.filename in str(store)334            store.info()335        # storers336        with ensure_clean_store(setup_path) as store:337            df = tm.makeDataFrame()338            store.append("df", df)339            s = store.get_storer("df")340            repr(s)341            str(s)342    @ignore_natural_naming_warning343    def test_contains(self, setup_path):344        with ensure_clean_store(setup_path) as store:345            store["a"] = tm.makeTimeSeries()346            store["b"] = tm.makeDataFrame()347            store["foo/bar"] = tm.makeDataFrame()348            assert "a" in store349            assert "b" in store350            assert "c" not in store351            assert "foo/bar" in store352            assert "/foo/bar" in store353            assert "/foo/b" not in store354            assert "bar" not in store355            # gh-2694: tables.NaturalNameWarning356            with catch_warnings(record=True):357                store["node())"] = tm.makeDataFrame()358            assert "node())" in store359    def test_versioning(self, setup_path):360        with ensure_clean_store(setup_path) as store:361            store["a"] = tm.makeTimeSeries()362            store["b"] = tm.makeDataFrame()363            df = tm.makeTimeDataFrame()364            _maybe_remove(store, "df1")365            store.append("df1", df[:10])366            store.append("df1", df[10:])367            assert store.root.a._v_attrs.pandas_version == "0.15.2"368            assert store.root.b._v_attrs.pandas_version == "0.15.2"369            assert store.root.df1._v_attrs.pandas_version == "0.15.2"370            # write a file and wipe its versioning371            _maybe_remove(store, "df2")372            store.append("df2", df)373            # this is an error because its table_type is appendable, but no374            # version info375            store.get_node("df2")._v_attrs.pandas_version = None376            msg = "'NoneType' object has no attribute 'startswith'"377            with pytest.raises(Exception, match=msg):378                store.select("df2")379    def test_mode(self, setup_path):380        df = tm.makeTimeDataFrame()381        def check(mode):382            with ensure_clean_path(setup_path) as path:383                # constructor384                if mode in ["r", "r+"]:385                    with pytest.raises(IOError):386                        HDFStore(path, mode=mode)387                else:388                    store = HDFStore(path, mode=mode)389                    assert store._handle.mode == mode390                    store.close()391            with ensure_clean_path(setup_path) as path:392                # context393                if mode in ["r", "r+"]:394                    with pytest.raises(IOError):395                        with HDFStore(path, mode=mode) as store:  # noqa396                            pass397                else:398                    with HDFStore(path, mode=mode) as store:399                        assert store._handle.mode == mode400            with ensure_clean_path(setup_path) as path:401                # conv write402                if mode in ["r", "r+"]:403                    with pytest.raises(IOError):404                        df.to_hdf(path, "df", mode=mode)405                    df.to_hdf(path, "df", mode="w")406                else:407                    df.to_hdf(path, "df", mode=mode)408                # conv read409                if mode in ["w"]:410                    msg = (411                        "mode w is not allowed while performing a read. "412                        r"Allowed modes are r, r\+ and a."413                    )414                    with pytest.raises(ValueError, match=msg):415                        read_hdf(path, "df", mode=mode)416                else:417                    result = read_hdf(path, "df", mode=mode)418                    tm.assert_frame_equal(result, df)419        def check_default_mode():420            # read_hdf uses default mode421            with ensure_clean_path(setup_path) as path:422                df.to_hdf(path, "df", mode="w")423                result = read_hdf(path, "df")424                tm.assert_frame_equal(result, df)425        check("r")426        check("r+")427        check("a")428        check("w")429        check_default_mode()430    def test_reopen_handle(self, setup_path):431        with ensure_clean_path(setup_path) as path:432            store = HDFStore(path, mode="a")433            store["a"] = tm.makeTimeSeries()434            # invalid mode change435            with pytest.raises(PossibleDataLossError):436                store.open("w")437            store.close()438            assert not store.is_open439            # truncation ok here440            store.open("w")441            assert store.is_open442            assert len(store) == 0443            store.close()444            assert not store.is_open445            store = HDFStore(path, mode="a")446            store["a"] = tm.makeTimeSeries()447            # reopen as read448            store.open("r")449            assert store.is_open450            assert len(store) == 1451            assert store._mode == "r"452            store.close()453            assert not store.is_open454            # reopen as append455            store.open("a")456            assert store.is_open457            assert len(store) == 1458            assert store._mode == "a"459            store.close()460            assert not store.is_open461            # reopen as append (again)462            store.open("a")463            assert store.is_open464            assert len(store) == 1465            assert store._mode == "a"466            store.close()467            assert not store.is_open468    def test_open_args(self, setup_path):469        with ensure_clean_path(setup_path) as path:470            df = tm.makeDataFrame()471            # create an in memory store472            store = HDFStore(473                path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0474            )475            store["df"] = df476            store.append("df2", df)477            tm.assert_frame_equal(store["df"], df)478            tm.assert_frame_equal(store["df2"], df)479            store.close()480            # the file should not have actually been written481            assert not os.path.exists(path)482    def test_flush(self, setup_path):483        with ensure_clean_store(setup_path) as store:484            store["a"] = tm.makeTimeSeries()485            store.flush()486            store.flush(fsync=True)487    def test_get(self, setup_path):488        with ensure_clean_store(setup_path) as store:489            store["a"] = tm.makeTimeSeries()490            left = store.get("a")491            right = store["a"]492            tm.assert_series_equal(left, right)493            left = store.get("/a")494            right = store["/a"]495            tm.assert_series_equal(left, right)496            with pytest.raises(KeyError, match="'No object named b in the file'"):497                store.get("b")498    @pytest.mark.parametrize(499        "where, expected",500        [501            (502                "/",503                {504                    "": ({"first_group", "second_group"}, set()),505                    "/first_group": (set(), {"df1", "df2"}),506                    "/second_group": ({"third_group"}, {"df3", "s1"}),507                    "/second_group/third_group": (set(), {"df4"}),508                },509            ),510            (511                "/second_group",512                {513                    "/second_group": ({"third_group"}, {"df3", "s1"}),514                    "/second_group/third_group": (set(), {"df4"}),515                },516            ),517        ],518    )519    def test_walk(self, where, expected, setup_path):520        # GH10143521        objs = {522            "df1": pd.DataFrame([1, 2, 3]),523            "df2": pd.DataFrame([4, 5, 6]),524            "df3": pd.DataFrame([6, 7, 8]),525            "df4": pd.DataFrame([9, 10, 11]),526            "s1": pd.Series([10, 9, 8]),527            # Next 3 items aren't pandas objects and should be ignored528            "a1": np.array([[1, 2, 3], [4, 5, 6]]),529            "tb1": np.array([(1, 2, 3), (4, 5, 6)], dtype="i,i,i"),530            "tb2": np.array([(7, 8, 9), (10, 11, 12)], dtype="i,i,i"),531        }532        with ensure_clean_store("walk_groups.hdf", mode="w") as store:533            store.put("/first_group/df1", objs["df1"])534            store.put("/first_group/df2", objs["df2"])535            store.put("/second_group/df3", objs["df3"])536            store.put("/second_group/s1", objs["s1"])537            store.put("/second_group/third_group/df4", objs["df4"])538            # Create non-pandas objects539            store._handle.create_array("/first_group", "a1", objs["a1"])540            store._handle.create_table("/first_group", "tb1", obj=objs["tb1"])541            store._handle.create_table("/second_group", "tb2", obj=objs["tb2"])542            assert len(list(store.walk(where=where))) == len(expected)543            for path, groups, leaves in store.walk(where=where):544                assert path in expected545                expected_groups, expected_frames = expected[path]546                assert expected_groups == set(groups)547                assert expected_frames == set(leaves)548                for leaf in leaves:549                    frame_path = "/".join([path, leaf])550                    obj = store.get(frame_path)551                    if "df" in leaf:552                        tm.assert_frame_equal(obj, objs[leaf])553                    else:554                        tm.assert_series_equal(obj, objs[leaf])555    def test_getattr(self, setup_path):556        with ensure_clean_store(setup_path) as store:557            s = tm.makeTimeSeries()558            store["a"] = s559            # test attribute access560            result = store.a561            tm.assert_series_equal(result, s)562            result = getattr(store, "a")563            tm.assert_series_equal(result, s)564            df = tm.makeTimeDataFrame()565            store["df"] = df566            result = store.df567            tm.assert_frame_equal(result, df)568            # errors569            for x in ["d", "mode", "path", "handle", "complib"]:570                with pytest.raises(AttributeError):571                    getattr(store, x)572            # not stores573            for x in ["mode", "path", "handle", "complib"]:574                getattr(store, f"_{x}")575    def test_put(self, setup_path):576        with ensure_clean_store(setup_path) as store:577            ts = tm.makeTimeSeries()578            df = tm.makeTimeDataFrame()579            store["a"] = ts580            store["b"] = df[:10]581            store["foo/bar/bah"] = df[:10]582            store["foo"] = df[:10]583            store["/foo"] = df[:10]584            store.put("c", df[:10], format="table")585            # not OK, not a table586            with pytest.raises(ValueError):587                store.put("b", df[10:], append=True)588            # node does not currently exist, test _is_table_type returns False589            # in this case590            _maybe_remove(store, "f")591            with pytest.raises(ValueError):592                store.put("f", df[10:], append=True)593            # can't put to a table (use append instead)594            with pytest.raises(ValueError):595                store.put("c", df[10:], append=True)596            # overwrite table597            store.put("c", df[:10], format="table", append=False)598            tm.assert_frame_equal(df[:10], store["c"])599    def test_put_string_index(self, setup_path):600        with ensure_clean_store(setup_path) as store:601            index = Index([f"I am a very long string index: {i}" for i in range(20)])602            s = Series(np.arange(20), index=index)603            df = DataFrame({"A": s, "B": s})604            store["a"] = s605            tm.assert_series_equal(store["a"], s)606            store["b"] = df607            tm.assert_frame_equal(store["b"], df)608            # mixed length609            index = Index(610                ["abcdefghijklmnopqrstuvwxyz1234567890"]611                + [f"I am a very long string index: {i}" for i in range(20)]612            )613            s = Series(np.arange(21), index=index)614            df = DataFrame({"A": s, "B": s})615            store["a"] = s616            tm.assert_series_equal(store["a"], s)617            store["b"] = df618            tm.assert_frame_equal(store["b"], df)619    def test_put_compression(self, setup_path):620        with ensure_clean_store(setup_path) as store:621            df = tm.makeTimeDataFrame()622            store.put("c", df, format="table", complib="zlib")623            tm.assert_frame_equal(store["c"], df)624            # can't compress if format='fixed'625            with pytest.raises(ValueError):626                store.put("b", df, format="fixed", complib="zlib")627    @td.skip_if_windows_python_3628    def test_put_compression_blosc(self, setup_path):629        df = tm.makeTimeDataFrame()630        with ensure_clean_store(setup_path) as store:631            # can't compress if format='fixed'632            with pytest.raises(ValueError):633                store.put("b", df, format="fixed", complib="blosc")634            store.put("c", df, format="table", complib="blosc")635            tm.assert_frame_equal(store["c"], df)636    def test_complibs_default_settings(self, setup_path):637        # GH15943638        df = tm.makeDataFrame()639        # Set complevel and check if complib is automatically set to640        # default value641        with ensure_clean_path(setup_path) as tmpfile:642            df.to_hdf(tmpfile, "df", complevel=9)643            result = pd.read_hdf(tmpfile, "df")644            tm.assert_frame_equal(result, df)645            with tables.open_file(tmpfile, mode="r") as h5file:646                for node in h5file.walk_nodes(where="/df", classname="Leaf"):647                    assert node.filters.complevel == 9648                    assert node.filters.complib == "zlib"649        # Set complib and check to see if compression is disabled650        with ensure_clean_path(setup_path) as tmpfile:651            df.to_hdf(tmpfile, "df", complib="zlib")652            result = pd.read_hdf(tmpfile, "df")653            tm.assert_frame_equal(result, df)654            with tables.open_file(tmpfile, mode="r") as h5file:655                for node in h5file.walk_nodes(where="/df", classname="Leaf"):656                    assert node.filters.complevel == 0657                    assert node.filters.complib is None658        # Check if not setting complib or complevel results in no compression659        with ensure_clean_path(setup_path) as tmpfile:660            df.to_hdf(tmpfile, "df")661            result = pd.read_hdf(tmpfile, "df")662            tm.assert_frame_equal(result, df)663            with tables.open_file(tmpfile, mode="r") as h5file:664                for node in h5file.walk_nodes(where="/df", classname="Leaf"):665                    assert node.filters.complevel == 0666                    assert node.filters.complib is None667        # Check if file-defaults can be overridden on a per table basis668        with ensure_clean_path(setup_path) as tmpfile:669            store = pd.HDFStore(tmpfile)670            store.append("dfc", df, complevel=9, complib="blosc")671            store.append("df", df)672            store.close()673            with tables.open_file(tmpfile, mode="r") as h5file:674                for node in h5file.walk_nodes(where="/df", classname="Leaf"):675                    assert node.filters.complevel == 0676                    assert node.filters.complib is None677                for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):678                    assert node.filters.complevel == 9679                    assert node.filters.complib == "blosc"680    def test_complibs(self, setup_path):681        # GH14478682        df = tm.makeDataFrame()683        # Building list of all complibs and complevels tuples684        all_complibs = tables.filters.all_complibs685        # Remove lzo if its not available on this platform686        if not tables.which_lib_version("lzo"):687            all_complibs.remove("lzo")688        # Remove bzip2 if its not available on this platform689        if not tables.which_lib_version("bzip2"):690            all_complibs.remove("bzip2")691        all_levels = range(0, 10)692        all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]693        for (lib, lvl) in all_tests:694            with ensure_clean_path(setup_path) as tmpfile:695                gname = "foo"696                # Write and read file to see if data is consistent697                df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)698                result = pd.read_hdf(tmpfile, gname)699                tm.assert_frame_equal(result, df)700                # Open file and check metadata701                # for correct amount of compression702                h5table = tables.open_file(tmpfile, mode="r")703                for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):704                    assert node.filters.complevel == lvl705                    if lvl == 0:706                        assert node.filters.complib is None707                    else:708                        assert node.filters.complib == lib709                h5table.close()710    def test_put_integer(self, setup_path):711        # non-date, non-string index712        df = DataFrame(np.random.randn(50, 100))713        self._check_roundtrip(df, tm.assert_frame_equal, setup_path)714    @td.xfail_non_writeable715    def test_put_mixed_type(self, setup_path):716        df = tm.makeTimeDataFrame()717        df["obj1"] = "foo"718        df["obj2"] = "bar"719        df["bool1"] = df["A"] > 0720        df["bool2"] = df["B"] > 0721        df["bool3"] = True722        df["int1"] = 1723        df["int2"] = 2724        df["timestamp1"] = Timestamp("20010102")725        df["timestamp2"] = Timestamp("20010103")726        df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)727        df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)728        df.loc[df.index[3:6], ["obj1"]] = np.nan729        df = df._consolidate()._convert(datetime=True)730        with ensure_clean_store(setup_path) as store:731            _maybe_remove(store, "df")732            # PerformanceWarning733            with catch_warnings(record=True):734                simplefilter("ignore", pd.errors.PerformanceWarning)735                store.put("df", df)736            expected = store.get("df")737            tm.assert_frame_equal(expected, df)738    @pytest.mark.filterwarnings(739        "ignore:object name:tables.exceptions.NaturalNameWarning"740    )741    def test_append(self, setup_path):742        with ensure_clean_store(setup_path) as store:743            # this is allowed by almost always don't want to do it744            # tables.NaturalNameWarning):745            with catch_warnings(record=True):746                df = tm.makeTimeDataFrame()747                _maybe_remove(store, "df1")748                store.append("df1", df[:10])749                store.append("df1", df[10:])750                tm.assert_frame_equal(store["df1"], df)751                _maybe_remove(store, "df2")752                store.put("df2", df[:10], format="table")753                store.append("df2", df[10:])754                tm.assert_frame_equal(store["df2"], df)755                _maybe_remove(store, "df3")756                store.append("/df3", df[:10])757                store.append("/df3", df[10:])758                tm.assert_frame_equal(store["df3"], df)759                # this is allowed by almost always don't want to do it760                # tables.NaturalNameWarning761                _maybe_remove(store, "/df3 foo")762                store.append("/df3 foo", df[:10])763                store.append("/df3 foo", df[10:])764                tm.assert_frame_equal(store["df3 foo"], df)765                # dtype issues - mizxed type in a single object column766                df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])767                df["mixed_column"] = "testing"768                df.loc[2, "mixed_column"] = np.nan769                _maybe_remove(store, "df")770                store.append("df", df)771                tm.assert_frame_equal(store["df"], df)772                # uints - test storage of uints773                uint_data = DataFrame(774                    {775                        "u08": Series(776                            np.random.randint(0, high=255, size=5), dtype=np.uint8777                        ),778                        "u16": Series(779                            np.random.randint(0, high=65535, size=5), dtype=np.uint16780                        ),781                        "u32": Series(782                            np.random.randint(0, high=2 ** 30, size=5), dtype=np.uint32783                        ),784                        "u64": Series(785                            [2 ** 58, 2 ** 59, 2 ** 60, 2 ** 61, 2 ** 62],786                            dtype=np.uint64,787                        ),788                    },789                    index=np.arange(5),790                )791                _maybe_remove(store, "uints")792                store.append("uints", uint_data)793                tm.assert_frame_equal(store["uints"], uint_data)794                # uints - test storage of uints in indexable columns795                _maybe_remove(store, "uints")796                # 64-bit indices not yet supported797                store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])798                tm.assert_frame_equal(store["uints"], uint_data)799    def test_append_series(self, setup_path):800        with ensure_clean_store(setup_path) as store:801            # basic802            ss = tm.makeStringSeries()803            ts = tm.makeTimeSeries()804            ns = Series(np.arange(100))805            store.append("ss", ss)806            result = store["ss"]807            tm.assert_series_equal(result, ss)808            assert result.name is None809            store.append("ts", ts)810            result = store["ts"]811            tm.assert_series_equal(result, ts)812            assert result.name is None813            ns.name = "foo"814            store.append("ns", ns)815            result = store["ns"]816            tm.assert_series_equal(result, ns)817            assert result.name == ns.name818            # select on the values819            expected = ns[ns > 60]820            result = store.select("ns", "foo>60")821            tm.assert_series_equal(result, expected)822            # select on the index and values823            expected = ns[(ns > 70) & (ns.index < 90)]824            result = store.select("ns", "foo>70 and index<90")825            tm.assert_series_equal(result, expected)826            # multi-index827            mi = DataFrame(np.random.randn(5, 1), columns=["A"])828            mi["B"] = np.arange(len(mi))829            mi["C"] = "foo"830            mi.loc[3:5, "C"] = "bar"831            mi.set_index(["C", "B"], inplace=True)832            s = mi.stack()833            s.index = s.index.droplevel(2)834            store.append("mi", s)835            tm.assert_series_equal(store["mi"], s)836    def test_store_index_types(self, setup_path):837        # GH5386838        # test storing various index types839        with ensure_clean_store(setup_path) as store:840            def check(format, index):841                df = DataFrame(np.random.randn(10, 2), columns=list("AB"))842                df.index = index(len(df))843                _maybe_remove(store, "df")844                store.put("df", df, format=format)845                tm.assert_frame_equal(df, store["df"])846            for index in [847                tm.makeFloatIndex,848                tm.makeStringIndex,849                tm.makeIntIndex,850                tm.makeDateIndex,851            ]:852                check("table", index)853                check("fixed", index)854            # period index currently broken for table855            # seee GH7796 FIXME856            check("fixed", tm.makePeriodIndex)857            # check('table',tm.makePeriodIndex)858            # unicode859            index = tm.makeUnicodeIndex860            check("table", index)861            check("fixed", index)862    @pytest.mark.skipif(863        not is_platform_little_endian(), reason="reason platform is not little endian"864    )865    def test_encoding(self, setup_path):866        with ensure_clean_store(setup_path) as store:867            df = DataFrame(dict(A="foo", B="bar"), index=range(5))868            df.loc[2, "A"] = np.nan869            df.loc[3, "B"] = np.nan870            _maybe_remove(store, "df")871            store.append("df", df, encoding="ascii")872            tm.assert_frame_equal(store["df"], df)873            expected = df.reindex(columns=["A"])874            result = store.select("df", Term("columns=A", encoding="ascii"))875            tm.assert_frame_equal(result, expected)876    @pytest.mark.parametrize(877        "val",878        [879            [b"E\xc9, 17", b"", b"a", b"b", b"c"],880            [b"E\xc9, 17", b"a", b"b", b"c"],881            [b"EE, 17", b"", b"a", b"b", b"c"],882            [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],883            [b"", b"a", b"b", b"c"],884            [b"\xf8\xfc", b"a", b"b", b"c"],885            [b"A\xf8\xfc", b"", b"a", b"b", b"c"],886            [np.nan, b"", b"b", b"c"],887            [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],888        ],889    )890    @pytest.mark.parametrize("dtype", ["category", object])891    def test_latin_encoding(self, setup_path, dtype, val):892        enc = "latin-1"893        nan_rep = ""894        key = "data"895        val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]896        ser = pd.Series(val, dtype=dtype)897        with ensure_clean_path(setup_path) as store:898            ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep)899            retr = read_hdf(store, key)900        s_nan = ser.replace(nan_rep, np.nan)901        tm.assert_series_equal(s_nan, retr)902    def test_append_some_nans(self, setup_path):903        with ensure_clean_store(setup_path) as store:904            df = DataFrame(905                {906                    "A": Series(np.random.randn(20)).astype("int32"),907                    "A1": np.random.randn(20),908                    "A2": np.random.randn(20),909                    "B": "foo",910                    "C": "bar",911                    "D": Timestamp("20010101"),912                    "E": datetime.datetime(2001, 1, 2, 0, 0),913                },914                index=np.arange(20),915            )916            # some nans917            _maybe_remove(store, "df1")918            df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan919            store.append("df1", df[:10])920            store.append("df1", df[10:])921            tm.assert_frame_equal(store["df1"], df)922            # first column923            df1 = df.copy()924            df1.loc[:, "A1"] = np.nan925            _maybe_remove(store, "df1")926            store.append("df1", df1[:10])927            store.append("df1", df1[10:])928            tm.assert_frame_equal(store["df1"], df1)929            # 2nd column930            df2 = df.copy()931            df2.loc[:, "A2"] = np.nan932            _maybe_remove(store, "df2")933            store.append("df2", df2[:10])934            store.append("df2", df2[10:])935            tm.assert_frame_equal(store["df2"], df2)936            # datetimes937            df3 = df.copy()938            df3.loc[:, "E"] = np.nan939            _maybe_remove(store, "df3")940            store.append("df3", df3[:10])941            store.append("df3", df3[10:])942            tm.assert_frame_equal(store["df3"], df3)943    def test_append_all_nans(self, setup_path):944        with ensure_clean_store(setup_path) as store:945            df = DataFrame(946                {"A1": np.random.randn(20), "A2": np.random.randn(20)},947                index=np.arange(20),948            )949            df.loc[0:15, :] = np.nan950            # nan some entire rows (dropna=True)951            _maybe_remove(store, "df")952            store.append("df", df[:10], dropna=True)953            store.append("df", df[10:], dropna=True)954            tm.assert_frame_equal(store["df"], df[-4:])955            # nan some entire rows (dropna=False)956            _maybe_remove(store, "df2")957            store.append("df2", df[:10], dropna=False)958            store.append("df2", df[10:], dropna=False)959            tm.assert_frame_equal(store["df2"], df)960            # tests the option io.hdf.dropna_table961            pd.set_option("io.hdf.dropna_table", False)962            _maybe_remove(store, "df3")963            store.append("df3", df[:10])964            store.append("df3", df[10:])965            tm.assert_frame_equal(store["df3"], df)966            pd.set_option("io.hdf.dropna_table", True)967            _maybe_remove(store, "df4")968            store.append("df4", df[:10])969            store.append("df4", df[10:])970            tm.assert_frame_equal(store["df4"], df[-4:])971            # nan some entire rows (string are still written!)972            df = DataFrame(973                {974                    "A1": np.random.randn(20),975                    "A2": np.random.randn(20),976                    "B": "foo",977                    "C": "bar",978                },979                index=np.arange(20),980            )981            df.loc[0:15, :] = np.nan982            _maybe_remove(store, "df")983            store.append("df", df[:10], dropna=True)984            store.append("df", df[10:], dropna=True)985            tm.assert_frame_equal(store["df"], df)986            _maybe_remove(store, "df2")987            store.append("df2", df[:10], dropna=False)988            store.append("df2", df[10:], dropna=False)989            tm.assert_frame_equal(store["df2"], df)990            # nan some entire rows (but since we have dates they are still991            # written!)992            df = DataFrame(993                {994                    "A1": np.random.randn(20),995                    "A2": np.random.randn(20),996                    "B": "foo",997                    "C": "bar",998                    "D": Timestamp("20010101"),999                    "E": datetime.datetime(2001, 1, 2, 0, 0),1000                },1001                index=np.arange(20),1002            )1003            df.loc[0:15, :] = np.nan1004            _maybe_remove(store, "df")1005            store.append("df", df[:10], dropna=True)1006            store.append("df", df[10:], dropna=True)1007            tm.assert_frame_equal(store["df"], df)1008            _maybe_remove(store, "df2")1009            store.append("df2", df[:10], dropna=False)1010            store.append("df2", df[10:], dropna=False)1011            tm.assert_frame_equal(store["df2"], df)1012        # Test to make sure defaults are to not drop.1013        # Corresponding to Issue 93821014        df_with_missing = DataFrame(1015            {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]}1016        )1017        with ensure_clean_path(setup_path) as path:1018            df_with_missing.to_hdf(path, "df_with_missing", format="table")1019            reloaded = read_hdf(path, "df_with_missing")1020            tm.assert_frame_equal(df_with_missing, reloaded)1021    def test_read_missing_key_close_store(self, setup_path):1022        # GH 257661023        with ensure_clean_path(setup_path) as path:1024            df = pd.DataFrame({"a": range(2), "b": range(2)})1025            df.to_hdf(path, "k1")1026            with pytest.raises(KeyError, match="'No object named k2 in the file'"):1027                pd.read_hdf(path, "k2")1028            # smoke test to test that file is properly closed after1029            # read with KeyError before another write1030            df.to_hdf(path, "k2")1031    def test_read_missing_key_opened_store(self, setup_path):1032        # GH 286991033        with ensure_clean_path(setup_path) as path:1034            df = pd.DataFrame({"a": range(2), "b": range(2)})1035            df.to_hdf(path, "k1")1036            with pd.HDFStore(path, "r") as store:1037                with pytest.raises(KeyError, match="'No object named k2 in the file'"):1038                    pd.read_hdf(store, "k2")1039                # Test that the file is still open after a KeyError and that we can1040                # still read from it.1041                pd.read_hdf(store, "k1")1042    def test_append_frame_column_oriented(self, setup_path):1043        with ensure_clean_store(setup_path) as store:1044            # column oriented1045            df = tm.makeTimeDataFrame()1046            df.index = df.index._with_freq(None)  # freq doesnt round-trip1047            _maybe_remove(store, "df1")1048            store.append("df1", df.iloc[:, :2], axes=["columns"])1049            store.append("df1", df.iloc[:, 2:])1050            tm.assert_frame_equal(store["df1"], df)1051            result = store.select("df1", "columns=A")1052            expected = df.reindex(columns=["A"])1053            tm.assert_frame_equal(expected, result)1054            # selection on the non-indexable1055            result = store.select("df1", ("columns=A", "index=df.index[0:4]"))1056            expected = df.reindex(columns=["A"], index=df.index[0:4])1057            tm.assert_frame_equal(expected, result)1058            # this isn't supported1059            with pytest.raises(TypeError):1060                store.select("df1", "columns=A and index>df.index[4]")1061    def test_append_with_different_block_ordering(self, setup_path):1062        # GH 4096; using same frames, but different block orderings1063        with ensure_clean_store(setup_path) as store:1064            for i in range(10):1065                df = DataFrame(np.random.randn(10, 2), columns=list("AB"))1066                df["index"] = range(10)1067                df["index"] += i * 101068                df["int64"] = Series([1] * len(df), dtype="int64")1069                df["int16"] = Series([1] * len(df), dtype="int16")1070                if i % 2 == 0:1071                    del df["int64"]1072                    df["int64"] = Series([1] * len(df), dtype="int64")1073                if i % 3 == 0:1074                    a = df.pop("A")1075                    df["A"] = a1076                df.set_index("index", inplace=True)1077                store.append("df", df)1078        # test a different ordering but with more fields (like invalid1079        # combinate)1080        with ensure_clean_store(setup_path) as store:1081            df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64")1082            df["int64"] = Series([1] * len(df), dtype="int64")1083            df["int16"] = Series([1] * len(df), dtype="int16")1084            store.append("df", df)1085            # store additional fields in different blocks1086            df["int16_2"] = Series([1] * len(df), dtype="int16")1087            with pytest.raises(ValueError):1088                store.append("df", df)1089            # store multiple additional fields in different blocks1090            df["float_3"] = Series([1.0] * len(df), dtype="float64")1091            with pytest.raises(ValueError):1092                store.append("df", df)1093    def test_append_with_strings(self, setup_path):1094        with ensure_clean_store(setup_path) as store:1095            with catch_warnings(record=True):1096                def check_col(key, name, size):1097                    assert (1098                        getattr(store.get_storer(key).table.description, name).itemsize1099                        == size1100                    )1101                # avoid truncation on elements1102                df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])1103                store.append("df_big", df)1104                tm.assert_frame_equal(store.select("df_big"), df)1105                check_col("df_big", "values_block_1", 15)1106                # appending smaller string ok1107                df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]])1108                store.append("df_big", df2)1109                expected = concat([df, df2])1110                tm.assert_frame_equal(store.select("df_big"), expected)1111                check_col("df_big", "values_block_1", 15)1112                # avoid truncation on elements1113                df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])1114                store.append("df_big2", df, min_itemsize={"values": 50})1115                tm.assert_frame_equal(store.select("df_big2"), df)1116                check_col("df_big2", "values_block_1", 50)1117                # bigger string on next append1118                store.append("df_new", df)1119                df_new = DataFrame(1120                    [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]]1121                )1122                with pytest.raises(ValueError):1123                    store.append("df_new", df_new)1124                # min_itemsize on Series index (GH 11412)1125                df = tm.makeMixedDataFrame().set_index("C")1126                store.append("ss", df["B"], min_itemsize={"index": 4})1127                tm.assert_series_equal(store.select("ss"), df["B"])1128                # same as above, with data_columns=True1129                store.append(1130                    "ss2", df["B"], data_columns=True, min_itemsize={"index": 4}1131                )1132                tm.assert_series_equal(store.select("ss2"), df["B"])1133                # min_itemsize in index without appending (GH 10381)1134                store.put("ss3", df, format="table", min_itemsize={"index": 6})1135                # just make sure there is a longer string:1136                df2 = df.copy().reset_index().assign(C="longer").set_index("C")1137                store.append("ss3", df2)1138                tm.assert_frame_equal(store.select("ss3"), pd.concat([df, df2]))1139                # same as above, with a Series1140                store.put("ss4", df["B"], format="table", min_itemsize={"index": 6})1141                store.append("ss4", df2["B"])1142                tm.assert_series_equal(1143                    store.select("ss4"), pd.concat([df["B"], df2["B"]])1144                )1145                # with nans1146                _maybe_remove(store, "df")1147                df = tm.makeTimeDataFrame()1148                df["string"] = "foo"1149                df.loc[df.index[1:4], "string"] = np.nan1150                df["string2"] = "bar"1151                df.loc[df.index[4:8], "string2"] = np.nan1152                df["string3"] = "bah"1153                df.loc[df.index[1:], "string3"] = np.nan1154                store.append("df", df)1155                result = store.select("df")1156                tm.assert_frame_equal(result, df)1157        with ensure_clean_store(setup_path) as store:1158            def check_col(key, name, size):1159                assert getattr(1160                    store.get_storer(key).table.description, name1161                ).itemsize, size1162            df = DataFrame(dict(A="foo", B="bar"), index=range(10))1163            # a min_itemsize that creates a data_column1164            _maybe_remove(store, "df")1165            store.append("df", df, min_itemsize={"A": 200})1166            check_col("df", "A", 200)1167            assert store.get_storer("df").data_columns == ["A"]1168            # a min_itemsize that creates a data_column21169            _maybe_remove(store, "df")1170            store.append("df", df, data_columns=["B"], min_itemsize={"A": 200})1171            check_col("df", "A", 200)1172            assert store.get_storer("df").data_columns == ["B", "A"]1173            # a min_itemsize that creates a data_column21174            _maybe_remove(store, "df")1175            store.append("df", df, data_columns=["B"], min_itemsize={"values": 200})1176            check_col("df", "B", 200)1177            check_col("df", "values_block_0", 200)1178            assert store.get_storer("df").data_columns == ["B"]1179            # infer the .typ on subsequent appends1180            _maybe_remove(store, "df")1181            store.append("df", df[:5], min_itemsize=200)1182            store.append("df", df[5:], min_itemsize=200)1183            tm.assert_frame_equal(store["df"], df)1184            # invalid min_itemsize keys1185            df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])1186            _maybe_remove(store, "df")1187            with pytest.raises(ValueError):1188                store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})1189    def test_append_with_empty_string(self, setup_path):1190        with ensure_clean_store(setup_path) as store:1191            # with all empty strings (GH 12242)1192            df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})1193            store.append("df", df[:-1], min_itemsize={"x": 1})1194            store.append("df", df[-1:], min_itemsize={"x": 1})1195            tm.assert_frame_equal(store.select("df"), df)1196    def test_to_hdf_with_min_itemsize(self, setup_path):1197        with ensure_clean_path(setup_path) as path:1198            # min_itemsize in index with to_hdf (GH 10381)1199            df = tm.makeMixedDataFrame().set_index("C")1200            df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6})1201            # just make sure there is a longer string:1202            df2 = df.copy().reset_index().assign(C="longer").set_index("C")1203            df2.to_hdf(path, "ss3", append=True, format="table")1204            tm.assert_frame_equal(pd.read_hdf(path, "ss3"), pd.concat([df, df2]))1205            # same as above, with a Series1206            df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6})1207            df2["B"].to_hdf(path, "ss4", append=True, format="table")1208            tm.assert_series_equal(1209                pd.read_hdf(path, "ss4"), pd.concat([df["B"], df2["B"]])1210            )1211    @pytest.mark.parametrize(1212        "format", [pytest.param("fixed", marks=td.xfail_non_writeable), "table"]1213    )1214    def test_to_hdf_errors(self, format, setup_path):1215        data = ["\ud800foo"]1216        ser = pd.Series(data, index=pd.Index(data))1217        with ensure_clean_path(setup_path) as path:1218            # GH 208351219            ser.to_hdf(path, "table", format=format, errors="surrogatepass")1220            result = pd.read_hdf(path, "table", errors="surrogatepass")1221            tm.assert_series_equal(result, ser)1222    def test_append_with_data_columns(self, setup_path):1223        with ensure_clean_store(setup_path) as store:1224            df = tm.makeTimeDataFrame()1225            df.iloc[0, df.columns.get_loc("B")] = 1.01226            _maybe_remove(store, "df")1227            store.append("df", df[:2], data_columns=["B"])1228            store.append("df", df[2:])1229            tm.assert_frame_equal(store["df"], df)1230            # check that we have indices created1231            assert store._handle.root.df.table.cols.index.is_indexed is True1232            assert store._handle.root.df.table.cols.B.is_indexed is True1233            # data column searching1234            result = store.select("df", "B>0")1235            expected = df[df.B > 0]1236            tm.assert_frame_equal(result, expected)1237            # data column searching (with an indexable and a data_columns)1238            result = store.select("df", "B>0 and index>df.index[3]")1239            df_new = df.reindex(index=df.index[4:])1240            expected = df_new[df_new.B > 0]1241            tm.assert_frame_equal(result, expected)1242            # data column selection with a string data_column1243            df_new = df.copy()1244            df_new["string"] = "foo"1245            df_new.loc[df_new.index[1:4], "string"] = np.nan1246            df_new.loc[df_new.index[5:6], "string"] = "bar"1247            _maybe_remove(store, "df")1248            store.append("df", df_new, data_columns=["string"])1249            result = store.select("df", "string='foo'")1250            expected = df_new[df_new.string == "foo"]1251            tm.assert_frame_equal(result, expected)1252            # using min_itemsize and a data column1253            def check_col(key, name, size):1254                assert (1255                    getattr(store.get_storer(key).table.description, name).itemsize1256                    == size1257                )1258        with ensure_clean_store(setup_path) as store:1259            _maybe_remove(store, "df")1260            store.append(1261                "df", df_new, data_columns=["string"], min_itemsize={"string": 30}1262            )1263            check_col("df", "string", 30)1264            _maybe_remove(store, "df")1265            store.append("df", df_new, data_columns=["string"], min_itemsize=30)1266            check_col("df", "string", 30)1267            _maybe_remove(store, "df")1268            store.append(1269                "df", df_new, data_columns=["string"], min_itemsize={"values": 30}1270            )1271            check_col("df", "string", 30)1272        with ensure_clean_store(setup_path) as store:1273            df_new["string2"] = "foobarbah"1274            df_new["string_block1"] = "foobarbah1"1275            df_new["string_block2"] = "foobarbah2"1276            _maybe_remove(store, "df")1277            store.append(1278                "df",1279                df_new,1280                data_columns=["string", "string2"],1281                min_itemsize={"string": 30, "string2": 40, "values": 50},1282            )1283            check_col("df", "string", 30)1284            check_col("df", "string2", 40)1285            check_col("df", "values_block_1", 50)1286        with ensure_clean_store(setup_path) as store:1287            # multiple data columns1288            df_new = df.copy()1289            df_new.iloc[0, df_new.columns.get_loc("A")] = 1.01290            df_new.iloc[0, df_new.columns.get_loc("B")] = -1.01291            df_new["string"] = "foo"1292            sl = df_new.columns.get_loc("string")1293            df_new.iloc[1:4, sl] = np.nan1294            df_new.iloc[5:6, sl] = "bar"1295            df_new["string2"] = "foo"1296            sl = df_new.columns.get_loc("string2")1297            df_new.iloc[2:5, sl] = np.nan1298            df_new.iloc[7:8, sl] = "bar"1299            _maybe_remove(store, "df")1300            store.append("df", df_new, data_columns=["A", "B", "string", "string2"])1301            result = store.select(1302                "df", "string='foo' and string2='foo' and A>0 and B<0"1303            )1304            expected = df_new[1305                (df_new.string == "foo")1306                & (df_new.string2 == "foo")1307                & (df_new.A > 0)1308                & (df_new.B < 0)1309            ]1310            tm.assert_frame_equal(1311                result, expected, check_index_type=False, check_freq=False1312            )1313            # yield an empty frame1314            result = store.select("df", "string='foo' and string2='cool'")1315            expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]1316            tm.assert_frame_equal(1317                result, expected, check_index_type=False, check_freq=False1318            )1319        with ensure_clean_store(setup_path) as store:1320            # doc example1321            df_dc = df.copy()1322            df_dc["string"] = "foo"1323            df_dc.loc[df_dc.index[4:6], "string"] = np.nan1324            df_dc.loc[df_dc.index[7:9], "string"] = "bar"1325            df_dc["string2"] = "cool"1326            df_dc["datetime"] = Timestamp("20010102")1327            df_dc = df_dc._convert(datetime=True)1328            df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan1329            _maybe_remove(store, "df_dc")1330            store.append(1331                "df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"]1332            )1333            result = store.select("df_dc", "B>0")1334            expected = df_dc[df_dc.B > 0]1335            tm.assert_frame_equal(1336                result, expected, check_index_type=False, check_freq=False1337            )1338            result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"])1339            expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]1340            tm.assert_frame_equal(1341                result, expected, check_index_type=False, check_freq=False1342            )1343            # FIXME: 2020-05-07 freq check randomly fails in the CI1344        with ensure_clean_store(setup_path) as store:1345            # doc example part 21346            np.random.seed(1234)1347            index = date_range("1/1/2000", periods=8)1348            df_dc = DataFrame(1349                np.random.randn(8, 3), index=index, columns=["A", "B", "C"]1350            )1351            df_dc["string"] = "foo"1352            df_dc.loc[df_dc.index[4:6], "string"] = np.nan1353            df_dc.loc[df_dc.index[7:9], "string"] = "bar"1354            df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs()1355            df_dc["string2"] = "cool"1356            # on-disk operations1357            store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"])1358            result = store.select("df_dc", "B>0")1359            expected = df_dc[df_dc.B > 0]1360            tm.assert_frame_equal(result, expected)1361            result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"'])1362            expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]1363            tm.assert_frame_equal(result, expected)1364    def test_create_table_index(self, setup_path):1365        with ensure_clean_store(setup_path) as store:1366            with catch_warnings(record=True):1367                def col(t, column):1368                    return getattr(store.get_storer(t).table.cols, column)1369                # data columns1370                df = tm.makeTimeDataFrame()1371                df["string"] = "foo"1372                df["string2"] = "bar"1373                store.append("f", df, data_columns=["string", "string2"])1374                assert col("f", "index").is_indexed is True1375                assert col("f", "string").is_indexed is True1376                assert col("f", "string2").is_indexed is True1377                # specify index=columns1378                store.append(1379                    "f2", df, index=["string"], data_columns=["string", "string2"]1380                )1381                assert col("f2", "index").is_indexed is False1382                assert col("f2", "string").is_indexed is True1383                assert col("f2", "string2").is_indexed is False1384                # try to index a non-table1385                _maybe_remove(store, "f2")1386                store.put("f2", df)1387                with pytest.raises(TypeError):1388                    store.create_table_index("f2")1389    def test_create_table_index_data_columns_argument(self, setup_path):1390        # GH 281561391        with ensure_clean_store(setup_path) as store:1392            with catch_warnings(record=True):1393                def col(t, column):1394                    return getattr(store.get_storer(t).table.cols, column)1395                # data columns1396                df = tm.makeTimeDataFrame()1397                df["string"] = "foo"1398                df["string2"] = "bar"1399                store.append("f", df, data_columns=["string"])1400                assert col("f", "index").is_indexed is True1401                assert col("f", "string").is_indexed is True1402                msg = "'Cols' object has no attribute 'string2'"1403                with pytest.raises(AttributeError, match=msg):1404                    col("f", "string2").is_indexed1405                # try to index a col which isn't a data_column1406                msg = (1407                    f"column string2 is not a data_column.\n"1408                    f"In order to read column string2 you must reload the dataframe \n"1409                    f"into HDFStore and include string2 with the data_columns argument."1410                )1411                with pytest.raises(AttributeError, match=msg):1412                    store.create_table_index("f", columns=["string2"])1413    def test_append_hierarchical(self, setup_path):1414        index = MultiIndex(1415            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],1416            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],1417            names=["foo", "bar"],1418        )1419        df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])1420        with ensure_clean_store(setup_path) as store:1421            store.append("mi", df)1422            result = store.select("mi")1423            tm.assert_frame_equal(result, df)1424            # GH 37481425            result = store.select("mi", columns=["A", "B"])1426            expected = df.reindex(columns=["A", "B"])1427            tm.assert_frame_equal(result, expected)1428        with ensure_clean_path("test.hdf") as path:1429            df.to_hdf(path, "df", format="table")1430            result = read_hdf(path, "df", columns=["A", "B"])1431            expected = df.reindex(columns=["A", "B"])1432            tm.assert_frame_equal(result, expected)1433    def test_column_multiindex(self, setup_path):1434        # GH 47101435        # recreate multi-indexes properly1436        index = MultiIndex.from_tuples(1437            [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]1438        )1439        df = DataFrame(np.arange(12).reshape(3, 4), columns=index)1440        expected = df.copy()1441        if isinstance(expected.index, RangeIndex):1442            expected.index = Int64Index(expected.index)1443        with ensure_clean_store(setup_path) as store:1444            store.put("df", df)1445            tm.assert_frame_equal(1446                store["df"], expected, check_index_type=True, check_column_type=True1447            )1448            store.put("df1", df, format="table")1449            tm.assert_frame_equal(1450                store["df1"], expected, check_index_type=True, check_column_type=True1451            )1452            with pytest.raises(ValueError):1453                store.put("df2", df, format="table", data_columns=["A"])1454            with pytest.raises(ValueError):1455                store.put("df3", df, format="table", data_columns=True)1456        # appending multi-column on existing table (see GH 6167)1457        with ensure_clean_store(setup_path) as store:1458            store.append("df2", df)1459            store.append("df2", df)1460            tm.assert_frame_equal(store["df2"], concat((df, df)))1461        # non_index_axes name1462        df = DataFrame(1463            np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")1464        )1465        expected = df.copy()1466        if isinstance(expected.index, RangeIndex):1467            expected.index = Int64Index(expected.index)1468        with ensure_clean_store(setup_path) as store:1469            store.put("df1", df, format="table")1470            tm.assert_frame_equal(1471                store["df1"], expected, check_index_type=True, check_column_type=True1472            )1473    def test_store_multiindex(self, setup_path):1474        # validate multi-index names1475        # GH 55271476        with ensure_clean_store(setup_path) as store:1477            def make_index(names=None):1478                return MultiIndex.from_tuples(1479                    [1480                        (datetime.datetime(2013, 12, d), s, t)1481                        for d in range(1, 3)1482                        for s in range(2)1483                        for t in range(3)1484                    ],1485                    names=names,1486                )1487            # no names1488            _maybe_remove(store, "df")1489            df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())1490            store.append("df", df)1491            tm.assert_frame_equal(store.select("df"), df)1492            # partial names1493            _maybe_remove(store, "df")1494            df = DataFrame(1495                np.zeros((12, 2)),1496                columns=["a", "b"],1497                index=make_index(["date", None, None]),1498            )1499            store.append("df", df)1500            tm.assert_frame_equal(store.select("df"), df)1501            # series1502            _maybe_remove(store, "s")1503            s = Series(np.zeros(12), index=make_index(["date", None, None]))1504            store.append("s", s)1505            xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))1506            tm.assert_series_equal(store.select("s"), xp)1507            # dup with column1508            _maybe_remove(store, "df")1509            df = DataFrame(1510                np.zeros((12, 2)),1511                columns=["a", "b"],1512                index=make_index(["date", "a", "t"]),1513            )1514            with pytest.raises(ValueError):1515                store.append("df", df)1516            # dup within level1517            _maybe_remove(store, "df")1518            df = DataFrame(1519                np.zeros((12, 2)),1520                columns=["a", "b"],1521                index=make_index(["date", "date", "date"]),1522            )1523            with pytest.raises(ValueError):1524                store.append("df", df)1525            # fully names1526            _maybe_remove(store, "df")1527            df = DataFrame(1528                np.zeros((12, 2)),1529                columns=["a", "b"],1530                index=make_index(["date", "s", "t"]),1531            )1532            store.append("df", df)1533            tm.assert_frame_equal(store.select("df"), df)1534    def test_select_columns_in_where(self, setup_path):1535        # GH 61691536        # recreate multi-indexes when columns is passed1537        # in the `where` argument1538        index = MultiIndex(1539            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],1540            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],1541            names=["foo_name", "bar_name"],1542        )1543        # With a DataFrame1544        df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])1545        with ensure_clean_store(setup_path) as store:1546            store.put("df", df, format="table")1547            expected = df[["A"]]1548            tm.assert_frame_equal(store.select("df", columns=["A"]), expected)1549            tm.assert_frame_equal(store.select("df", where="columns=['A']"), expected)1550        # With a Series1551        s = Series(np.random.randn(10), index=index, name="A")1552        with ensure_clean_store(setup_path) as store:1553            store.put("s", s, format="table")1554            tm.assert_series_equal(store.select("s", where="columns=['A']"), s)1555    def test_mi_data_columns(self, setup_path):1556        # GH 144351557        idx = pd.MultiIndex.from_arrays(1558            [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"]1559        )1560        df = pd.DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)1561        with ensure_clean_store(setup_path) as store:1562            store.append("df", df, data_columns=True)1563            actual = store.select("df", where="id == 1")1564            expected = df.iloc[[1], :]1565            tm.assert_frame_equal(actual, expected)1566    def test_pass_spec_to_storer(self, setup_path):1567        df = tm.makeDataFrame()1568        with ensure_clean_store(setup_path) as store:1569            store.put("df", df)1570            with pytest.raises(TypeError):1571                store.select("df", columns=["A"])1572            with pytest.raises(TypeError):1573                store.select("df", where=[("columns=A")])1574    @td.xfail_non_writeable1575    def test_append_misc(self, setup_path):1576        with ensure_clean_store(setup_path) as store:1577            df = tm.makeDataFrame()1578            store.append("df", df, chunksize=1)1579            result = store.select("df")1580            tm.assert_frame_equal(result, df)1581            store.append("df1", df, expectedrows=10)1582            result = store.select("df1")1583            tm.assert_frame_equal(result, df)1584        # more chunksize in append tests1585        def check(obj, comparator):1586            for c in [10, 200, 1000]:1587                with ensure_clean_store(setup_path, mode="w") as store:1588                    store.append("obj", obj, chunksize=c)1589                    result = store.select("obj")1590                    comparator(result, obj)1591        df = tm.makeDataFrame()1592        df["string"] = "foo"1593        df["float322"] = 1.01594        df["float322"] = df["float322"].astype("float32")1595        df["bool"] = df["float322"] > 01596        df["time1"] = Timestamp("20130101")1597        df["time2"] = Timestamp("20130102")1598        check(df, tm.assert_frame_equal)1599        # empty frame, GH42731600        with ensure_clean_store(setup_path) as store:1601            # 0 len1602            df_empty = DataFrame(columns=list("ABC"))1603            store.append("df", df_empty)1604            with pytest.raises(KeyError, match="'No object named df in the file'"):1605                store.select("df")1606            # repeated append of 0/non-zero frames1607            df = DataFrame(np.random.rand(10, 3), columns=list("ABC"))1608            store.append("df", df)1609            tm.assert_frame_equal(store.select("df"), df)1610            store.append("df", df_empty)1611            tm.assert_frame_equal(store.select("df"), df)1612            # store1613            df = DataFrame(columns=list("ABC"))1614            store.put("df2", df)1615            tm.assert_frame_equal(store.select("df2"), df)1616    def test_append_raise(self, setup_path):1617        with ensure_clean_store(setup_path) as store:1618            # test append with invalid input to get good error messages1619            # list in column1620            df = tm.makeDataFrame()1621            df["invalid"] = [["a"]] * len(df)1622            assert df.dtypes["invalid"] == np.object_1623            with pytest.raises(TypeError):1624                store.append("df", df)1625            # multiple invalid columns1626            df["invalid2"] = [["a"]] * len(df)1627            df["invalid3"] = [["a"]] * len(df)1628            with pytest.raises(TypeError):1629                store.append("df", df)1630            # datetime with embedded nans as object1631            df = tm.makeDataFrame()1632            s = Series(datetime.datetime(2001, 1, 2), index=df.index)1633            s = s.astype(object)1634            s[0:5] = np.nan1635            df["invalid"] = s1636            assert df.dtypes["invalid"] == np.object_1637            with pytest.raises(TypeError):1638                store.append("df", df)1639            # directly ndarray1640            with pytest.raises(TypeError):1641                store.append("df", np.arange(10))1642            # series directly1643            with pytest.raises(TypeError):1644                store.append("df", Series(np.arange(10)))1645            # appending an incompatible table1646            df = tm.makeDataFrame()1647            store.append("df", df)1648            df["foo"] = "foo"1649            with pytest.raises(ValueError):1650                store.append("df", df)1651    def test_table_index_incompatible_dtypes(self, setup_path):1652        df1 = DataFrame({"a": [1, 2, 3]})1653        df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))1654        with ensure_clean_store(setup_path) as store:1655            store.put("frame", df1, format="table")1656            with pytest.raises(TypeError):1657                store.put("frame", df2, format="table", append=True)1658    def test_table_values_dtypes_roundtrip(self, setup_path):1659        with ensure_clean_store(setup_path) as store:1660            df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")1661            store.append("df_f8", df1)1662            tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)1663            df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8")1664            store.append("df_i8", df2)1665            tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)1666            # incompatible dtype1667            with pytest.raises(ValueError):1668                store.append("df_i8", df1)1669            # check creation/storage/retrieval of float32 (a bit hacky to1670            # actually create them thought)1671            df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"])1672            store.append("df_f4", df1)1673            tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes)1674            assert df1.dtypes[0] == "float32"1675            # check with mixed dtypes1676            df1 = DataFrame(1677                {1678                    c: Series(np.random.randint(5), dtype=c)1679                    for c in ["float32", "float64", "int32", "int64", "int16", "int8"]1680                }1681            )1682            df1["string"] = "foo"1683            df1["float322"] = 1.01684            df1["float322"] = df1["float322"].astype("float32")1685            df1["bool"] = df1["float32"] > 01686            df1["time1"] = Timestamp("20130101")1687            df1["time2"] = Timestamp("20130102")1688            store.append("df_mixed_dtypes1", df1)1689            result = store.select("df_mixed_dtypes1").dtypes.value_counts()1690            result.index = [str(i) for i in result.index]1691            expected = Series(1692                {1693                    "float32": 2,1694                    "float64": 1,1695                    "int32": 1,1696                    "bool": 1,1697                    "int16": 1,1698                    "int8": 1,1699                    "int64": 1,1700                    "object": 1,1701                    "datetime64[ns]": 2,1702                }1703            )1704            result = result.sort_index()1705            expected = expected.sort_index()1706            tm.assert_series_equal(result, expected)1707    def test_table_mixed_dtypes(self, setup_path):1708        # frame1709        df = tm.makeDataFrame()1710        df["obj1"] = "foo"1711        df["obj2"] = "bar"1712        df["bool1"] = df["A"] > 01713        df["bool2"] = df["B"] > 01714        df["bool3"] = True1715        df["int1"] = 11716        df["int2"] = 21717        df["timestamp1"] = Timestamp("20010102")1718        df["timestamp2"] = Timestamp("20010103")1719        df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)1720        df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)1721        df.loc[df.index[3:6], ["obj1"]] = np.nan1722        df = df._consolidate()._convert(datetime=True)1723        with ensure_clean_store(setup_path) as store:1724            store.append("df1_mixed", df)1725            tm.assert_frame_equal(store.select("df1_mixed"), df)1726    def test_unimplemented_dtypes_table_columns(self, setup_path):1727        with ensure_clean_store(setup_path) as store:1728            dtypes = [("date", datetime.date(2001, 1, 2))]1729            # currently not supported dtypes ####1730            for n, f in dtypes:1731                df = tm.makeDataFrame()1732                df[n] = f1733                with pytest.raises(TypeError):1734                    store.append(f"df1_{n}", df)1735        # frame1736        df = tm.makeDataFrame()1737        df["obj1"] = "foo"1738        df["obj2"] = "bar"1739        df["datetime1"] = datetime.date(2001, 1, 2)1740        df = df._consolidate()._convert(datetime=True)1741        with ensure_clean_store(setup_path) as store:1742            # this fails because we have a date in the object block......1743            with pytest.raises(TypeError):1744                store.append("df_unimplemented", df)1745    @td.xfail_non_writeable1746    @pytest.mark.skipif(1747        LooseVersion(np.__version__) == LooseVersion("1.15.0"),1748        reason=(1749            "Skipping  pytables test when numpy version is "1750            "exactly equal to 1.15.0: gh-22098"1751        ),1752    )1753    def test_calendar_roundtrip_issue(self, setup_path):1754        # 85911755        # doc example from tseries holiday section1756        weekmask_egypt = "Sun Mon Tue Wed Thu"1757        holidays = [1758            "2012-05-01",1759            datetime.datetime(2013, 5, 1),1760            np.datetime64("2014-05-01"),1761        ]1762        bday_egypt = pd.offsets.CustomBusinessDay(1763            holidays=holidays, weekmask=weekmask_egypt1764        )1765        dt = datetime.datetime(2013, 4, 30)1766        dts = date_range(dt, periods=5, freq=bday_egypt)1767        s = Series(dts.weekday, dts).map(Series("Mon Tue Wed Thu Fri Sat Sun".split()))1768        with ensure_clean_store(setup_path) as store:1769            store.put("fixed", s)1770            result = store.select("fixed")1771            tm.assert_series_equal(result, s)1772            store.append("table", s)1773            result = store.select("table")1774            tm.assert_series_equal(result, s)1775    def test_roundtrip_tz_aware_index(self, setup_path):1776        # GH 176181777        time = pd.Timestamp("2000-01-01 01:00:00", tz="US/Eastern")1778        df = pd.DataFrame(data=[0], index=[time])1779        with ensure_clean_store(setup_path) as store:1780            store.put("frame", df, format="fixed")1781            recons = store["frame"]1782            tm.assert_frame_equal(recons, df)1783            assert recons.index[0].value == 9467064000000000001784    def test_append_with_timedelta(self, setup_path):1785        # GH 35771786        # append timedelta1787        df = DataFrame(1788            dict(1789                A=Timestamp("20130101"),1790                B=[1791                    Timestamp("20130101") + timedelta(days=i, seconds=10)1792                    for i in range(10)1793                ],1794            )1795        )1796        df["C"] = df["A"] - df["B"]1797        df.loc[3:5, "C"] = np.nan1798        with ensure_clean_store(setup_path) as store:1799            # table1800            _maybe_remove(store, "df")1801            store.append("df", df, data_columns=True)1802            result = store.select("df")1803            tm.assert_frame_equal(result, df)1804            result = store.select("df", where="C<100000")1805            tm.assert_frame_equal(result, df)1806            result = store.select("df", where="C<pd.Timedelta('-3D')")1807            tm.assert_frame_equal(result, df.iloc[3:])1808            result = store.select("df", "C<'-3D'")1809            tm.assert_frame_equal(result, df.iloc[3:])1810            # a bit hacky here as we don't really deal with the NaT properly1811            result = store.select("df", "C<'-500000s'")1812            result = result.dropna(subset=["C"])1813            tm.assert_frame_equal(result, df.iloc[6:])1814            result = store.select("df", "C<'-3.5D'")1815            result = result.iloc[1:]1816            tm.assert_frame_equal(result, df.iloc[4:])1817            # fixed1818            _maybe_remove(store, "df2")1819            store.put("df2", df)1820            result = store.select("df2")1821            tm.assert_frame_equal(result, df)1822    def test_remove(self, setup_path):1823        with ensure_clean_store(setup_path) as store:1824            ts = tm.makeTimeSeries()1825            df = tm.makeDataFrame()1826            store["a"] = ts1827            store["b"] = df1828            _maybe_remove(store, "a")1829            assert len(store) == 11830            tm.assert_frame_equal(df, store["b"])1831            _maybe_remove(store, "b")1832            assert len(store) == 01833            # nonexistence1834            with pytest.raises(1835                KeyError, match="'No object named a_nonexistent_store in the file'"1836            ):1837                store.remove("a_nonexistent_store")1838            # pathing1839            store["a"] = ts1840            store["b/foo"] = df1841            _maybe_remove(store, "foo")1842            _maybe_remove(store, "b/foo")1843            assert len(store) == 11844            store["a"] = ts1845            store["b/foo"] = df1846            _maybe_remove(store, "b")1847            assert len(store) == 11848            # __delitem__1849            store["a"] = ts1850            store["b"] = df1851            del store["a"]1852            del store["b"]1853            assert len(store) == 01854    def test_invalid_terms(self, setup_path):1855        with ensure_clean_store(setup_path) as store:1856            with catch_warnings(record=True):1857                df = tm.makeTimeDataFrame()1858                df["string"] = "foo"1859                df.loc[df.index[0:4], "string"] = "bar"1860                store.put("df", df, format="table")1861                # some invalid terms1862                with pytest.raises(TypeError):1863                    Term()1864                # more invalid1865                with pytest.raises(ValueError):1866                    store.select("df", "df.index[3]")1867                with pytest.raises(SyntaxError):1868                    store.select("df", "index>")1869        # from the docs1870        with ensure_clean_path(setup_path) as path:1871            dfq = DataFrame(1872                np.random.randn(10, 4),1873                columns=list("ABCD"),1874                index=date_range("20130101", periods=10),1875            )1876            dfq.to_hdf(path, "dfq", format="table", data_columns=True)1877            # check ok1878            read_hdf(1879                path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']"1880            )1881            read_hdf(path, "dfq", where="A>0 or C>0")1882        # catch the invalid reference1883        with ensure_clean_path(setup_path) as path:1884            dfq = DataFrame(1885                np.random.randn(10, 4),1886                columns=list("ABCD"),1887                index=date_range("20130101", periods=10),1888            )1889            dfq.to_hdf(path, "dfq", format="table")1890            with pytest.raises(ValueError):1891                read_hdf(path, "dfq", where="A>0 or C>0")1892    def test_same_name_scoping(self, setup_path):1893        with ensure_clean_store(setup_path) as store:1894            import pandas as pd1895            df = DataFrame(1896                np.random.randn(20, 2), index=pd.date_range("20130101", periods=20)1897            )1898            store.put("df", df, format="table")1899            expected = df[df.index > pd.Timestamp("20130105")]1900            import datetime  # noqa1901            result = store.select("df", "index>datetime.datetime(2013,1,5)")1902            tm.assert_frame_equal(result, expected)1903            from datetime import datetime  # noqa1904            # technically an error, but allow it1905            result = store.select("df", "index>datetime.datetime(2013,1,5)")1906            tm.assert_frame_equal(result, expected)1907            result = store.select("df", "index>datetime(2013,1,5)")1908            tm.assert_frame_equal(result, expected)1909    def test_series(self, setup_path):1910        s = tm.makeStringSeries()1911        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)1912        ts = tm.makeTimeSeries()1913        self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)1914        ts2 = Series(ts.index, Index(ts.index, dtype=object))1915        self._check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)1916        ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))1917        self._check_roundtrip(1918            ts3, tm.assert_series_equal, path=setup_path, check_index_type=False1919        )1920    def test_float_index(self, setup_path):1921        # GH #4541922        index = np.random.randn(10)1923        s = Series(np.random.randn(10), index=index)1924        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)1925    @td.xfail_non_writeable1926    def test_tuple_index(self, setup_path):1927        # GH #4921928        col = np.arange(10)1929        idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]1930        data = np.random.randn(30).reshape((3, 10))1931        DF = DataFrame(data, index=idx, columns=col)1932        with catch_warnings(record=True):1933            simplefilter("ignore", pd.errors.PerformanceWarning)1934            self._check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)1935    @td.xfail_non_writeable1936    @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")1937    def test_index_types(self, setup_path):1938        with catch_warnings(record=True):1939            values = np.random.randn(2)1940            func = lambda l, r: tm.assert_series_equal(l, r, check_index_type=True)1941        with catch_warnings(record=True):1942            ser = Series(values, [0, "y"])1943            self._check_roundtrip(ser, func, path=setup_path)1944        with catch_warnings(record=True):1945            ser = Series(values, [datetime.datetime.today(), 0])1946            self._check_roundtrip(ser, func, path=setup_path)1947        with catch_warnings(record=True):1948            ser = Series(values, ["y", 0])1949            self._check_roundtrip(ser, func, path=setup_path)1950        with catch_warnings(record=True):1951            ser = Series(values, [datetime.date.today(), "a"])1952            self._check_roundtrip(ser, func, path=setup_path)1953        with catch_warnings(record=True):1954            ser = Series(values, [0, "y"])1955            self._check_roundtrip(ser, func, path=setup_path)1956            ser = Series(values, [datetime.datetime.today(), 0])1957            self._check_roundtrip(ser, func, path=setup_path)1958            ser = Series(values, ["y", 0])1959            self._check_roundtrip(ser, func, path=setup_path)1960            ser = Series(values, [datetime.date.today(), "a"])1961            self._check_roundtrip(ser, func, path=setup_path)1962            ser = Series(values, [1.23, "b"])1963            self._check_roundtrip(ser, func, path=setup_path)1964            ser = Series(values, [1, 1.53])1965            self._check_roundtrip(ser, func, path=setup_path)1966            ser = Series(values, [1, 5])1967            self._check_roundtrip(ser, func, path=setup_path)1968            ser = Series(1969                values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)]1970            )1971            self._check_roundtrip(ser, func, path=setup_path)1972    def test_timeseries_preepoch(self, setup_path):1973        dr = bdate_range("1/1/1940", "1/1/1960")1974        ts = Series(np.random.randn(len(dr)), index=dr)1975        try:1976            self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)1977        except OverflowError:1978            pytest.skip("known failer on some windows platforms")1979    @td.xfail_non_writeable1980    @pytest.mark.parametrize(1981        "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]1982    )1983    def test_frame(self, compression, setup_path):1984        df = tm.makeDataFrame()1985        # put in some random NAs1986        df.values[0, 0] = np.nan1987        df.values[5, 3] = np.nan1988        self._check_roundtrip_table(1989            df, tm.assert_frame_equal, path=setup_path, compression=compression1990        )1991        self._check_roundtrip(1992            df, tm.assert_frame_equal, path=setup_path, compression=compression1993        )1994        tdf = tm.makeTimeDataFrame()1995        self._check_roundtrip(1996            tdf, tm.assert_frame_equal, path=setup_path, compression=compression1997        )1998        with ensure_clean_store(setup_path) as store:1999            # not consolidated2000            df["foo"] = np.random.randn(len(df))2001            store["df"] = df2002            recons = store["df"]2003            assert recons._mgr.is_consolidated()2004        # empty2005        self._check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)2006    @td.xfail_non_writeable2007    def test_empty_series_frame(self, setup_path):2008        s0 = Series(dtype=object)2009        s1 = Series(name="myseries", dtype=object)2010        df0 = DataFrame()2011        df1 = DataFrame(index=["a", "b", "c"])2012        df2 = DataFrame(columns=["d", "e", "f"])2013        self._check_roundtrip(s0, tm.assert_series_equal, path=setup_path)2014        self._check_roundtrip(s1, tm.assert_series_equal, path=setup_path)2015        self._check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)2016        self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)2017        self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)2018    @td.xfail_non_writeable2019    @pytest.mark.parametrize(2020        "dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"]2021    )2022    def test_empty_series(self, dtype, setup_path):2023        s = Series(dtype=dtype)2024        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)2025    def test_can_serialize_dates(self, setup_path):2026        rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]2027        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)2028        self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)2029    def test_store_hierarchical(self, setup_path):2030        index = MultiIndex(2031            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],2032            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],2033            names=["foo", "bar"],2034        )2035        frame = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])2036        self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)2037        self._check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)2038        self._check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)2039        # check that the names are stored2040        with ensure_clean_store(setup_path) as store:2041            store["frame"] = frame2042            recons = store["frame"]2043            tm.assert_frame_equal(recons, frame)2044    def test_store_index_name(self, setup_path):2045        df = tm.makeDataFrame()2046        df.index.name = "foo"2047        with ensure_clean_store(setup_path) as store:2048            store["frame"] = df2049            recons = store["frame"]2050            tm.assert_frame_equal(recons, df)2051    def test_store_index_name_with_tz(self, setup_path):2052        # GH 138842053        df = pd.DataFrame({"A": [1, 2]})2054        df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788])2055        df.index = df.index.tz_localize("UTC")2056        df.index.name = "foo"2057        with ensure_clean_store(setup_path) as store:2058            store.put("frame", df, format="table")2059            recons = store["frame"]2060            tm.assert_frame_equal(recons, df)2061    @pytest.mark.parametrize("table_format", ["table", "fixed"])2062    def test_store_index_name_numpy_str(self, table_format, setup_path):2063        # GH #134922064        idx = pd.Index(2065            pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]),2066            name="cols\u05d2",2067        )2068        idx1 = pd.Index(2069            pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]),2070            name="rows\u05d0",2071        )2072        df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)2073        # This used to fail, returning numpy strings instead of python strings.2074        with ensure_clean_path(setup_path) as path:2075            df.to_hdf(path, "df", format=table_format)2076            df2 = read_hdf(path, "df")2077            tm.assert_frame_equal(df, df2, check_names=True)2078            assert type(df2.index.name) == str2079            assert type(df2.columns.name) == str2080    def test_store_series_name(self, setup_path):2081        df = tm.makeDataFrame()2082        series = df["A"]2083        with ensure_clean_store(setup_path) as store:2084            store["series"] = series2085            recons = store["series"]2086            tm.assert_series_equal(recons, series)2087    @td.xfail_non_writeable2088    @pytest.mark.parametrize(2089        "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]2090    )2091    def test_store_mixed(self, compression, setup_path):2092        def _make_one():2093            df = tm.makeDataFrame()2094            df["obj1"] = "foo"2095            df["obj2"] = "bar"2096            df["bool1"] = df["A"] > 02097            df["bool2"] = df["B"] > 02098            df["int1"] = 12099            df["int2"] = 22100            return df._consolidate()2101        df1 = _make_one()2102        df2 = _make_one()2103        self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)2104        self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)2105        with ensure_clean_store(setup_path) as store:2106            store["obj"] = df12107            tm.assert_frame_equal(store["obj"], df1)2108            store["obj"] = df22109            tm.assert_frame_equal(store["obj"], df2)2110        # check that can store Series of all of these types2111        self._check_roundtrip(2112            df1["obj1"],2113            tm.assert_series_equal,2114            path=setup_path,2115            compression=compression,2116        )2117        self._check_roundtrip(2118            df1["bool1"],2119            tm.assert_series_equal,2120            path=setup_path,2121            compression=compression,2122        )2123        self._check_roundtrip(2124            df1["int1"],2125            tm.assert_series_equal,2126            path=setup_path,2127            compression=compression,2128        )2129    @pytest.mark.filterwarnings(2130        "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"2131    )2132    def test_select_with_dups(self, setup_path):2133        # single dtypes2134        df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"])2135        df.index = date_range("20130101 9:30", periods=10, freq="T")2136        with ensure_clean_store(setup_path) as store:2137            store.append("df", df)2138            result = store.select("df")2139            expected = df2140            tm.assert_frame_equal(result, expected, by_blocks=True)2141            result = store.select("df", columns=df.columns)2142            expected = df2143            tm.assert_frame_equal(result, expected, by_blocks=True)2144            result = store.select("df", columns=["A"])2145            expected = df.loc[:, ["A"]]2146            tm.assert_frame_equal(result, expected)2147        # dups across dtypes2148        df = concat(2149            [2150                DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),2151                DataFrame(2152                    np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]2153                ),2154            ],2155            axis=1,2156        )2157        df.index = date_range("20130101 9:30", periods=10, freq="T")2158        with ensure_clean_store(setup_path) as store:2159            store.append("df", df)2160            result = store.select("df")2161            expected = df2162            tm.assert_frame_equal(result, expected, by_blocks=True)2163            result = store.select("df", columns=df.columns)2164            expected = df2165            tm.assert_frame_equal(result, expected, by_blocks=True)2166            expected = df.loc[:, ["A"]]2167            result = store.select("df", columns=["A"])2168            tm.assert_frame_equal(result, expected, by_blocks=True)2169            expected = df.loc[:, ["B", "A"]]2170            result = store.select("df", columns=["B", "A"])2171            tm.assert_frame_equal(result, expected, by_blocks=True)2172        # duplicates on both index and columns2173        with ensure_clean_store(setup_path) as store:2174            store.append("df", df)2175            store.append("df", df)2176            expected = df.loc[:, ["B", "A"]]2177            expected = concat([expected, expected])2178            result = store.select("df", columns=["B", "A"])2179            tm.assert_frame_equal(result, expected, by_blocks=True)2180    def test_overwrite_node(self, setup_path):2181        with ensure_clean_store(setup_path) as store:2182            store["a"] = tm.makeTimeDataFrame()2183            ts = tm.makeTimeSeries()2184            store["a"] = ts2185            tm.assert_series_equal(store["a"], ts)2186    def test_select(self, setup_path):2187        with ensure_clean_store(setup_path) as store:2188            with catch_warnings(record=True):2189                # select with columns=2190                df = tm.makeTimeDataFrame()2191                _maybe_remove(store, "df")2192                store.append("df", df)2193                result = store.select("df", columns=["A", "B"])2194                expected = df.reindex(columns=["A", "B"])2195                tm.assert_frame_equal(expected, result)2196                # equivalently2197                result = store.select("df", [("columns=['A', 'B']")])2198                expected = df.reindex(columns=["A", "B"])2199                tm.assert_frame_equal(expected, result)2200                # with a data column2201                _maybe_remove(store, "df")2202                store.append("df", df, data_columns=["A"])2203                result = store.select("df", ["A > 0"], columns=["A", "B"])2204                expected = df[df.A > 0].reindex(columns=["A", "B"])2205                tm.assert_frame_equal(expected, result)2206                # all a data columns2207                _maybe_remove(store, "df")2208                store.append("df", df, data_columns=True)2209                result = store.select("df", ["A > 0"], columns=["A", "B"])2210                expected = df[df.A > 0].reindex(columns=["A", "B"])2211                tm.assert_frame_equal(expected, result)2212                # with a data column, but different columns2213                _maybe_remove(store, "df")2214                store.append("df", df, data_columns=["A"])2215                result = store.select("df", ["A > 0"], columns=["C", "D"])2216                expected = df[df.A > 0].reindex(columns=["C", "D"])2217                tm.assert_frame_equal(expected, result)2218    def test_select_dtypes(self, setup_path):2219        with ensure_clean_store(setup_path) as store:2220            # with a Timestamp data column (GH #2637)2221            df = DataFrame(2222                dict(ts=bdate_range("2012-01-01", periods=300), A=np.random.randn(300))2223            )2224            _maybe_remove(store, "df")2225            store.append("df", df, data_columns=["ts", "A"])2226            result = store.select("df", "ts>=Timestamp('2012-02-01')")2227            expected = df[df.ts >= Timestamp("2012-02-01")]2228            tm.assert_frame_equal(expected, result)2229            # bool columns (GH #2849)2230            df = DataFrame(np.random.randn(5, 2), columns=["A", "B"])2231            df["object"] = "foo"2232            df.loc[4:5, "object"] = "bar"2233            df["boolv"] = df["A"] > 02234            _maybe_remove(store, "df")2235            store.append("df", df, data_columns=True)2236            expected = df[df.boolv == True].reindex(columns=["A", "boolv"])  # noqa2237            for v in [True, "true", 1]:2238                result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])2239                tm.assert_frame_equal(expected, result)2240            expected = df[df.boolv == False].reindex(columns=["A", "boolv"])  # noqa2241            for v in [False, "false", 0]:2242                result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])2243                tm.assert_frame_equal(expected, result)2244            # integer index2245            df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))2246            _maybe_remove(store, "df_int")2247            store.append("df_int", df)2248            result = store.select("df_int", "index<10 and columns=['A']")2249            expected = df.reindex(index=list(df.index)[0:10], columns=["A"])2250            tm.assert_frame_equal(expected, result)2251            # float index2252            df = DataFrame(2253                dict(2254                    A=np.random.rand(20),2255                    B=np.random.rand(20),2256                    index=np.arange(20, dtype="f8"),2257                )2258            )2259            _maybe_remove(store, "df_float")2260            store.append("df_float", df)2261            result = store.select("df_float", "index<10.0 and columns=['A']")2262            expected = df.reindex(index=list(df.index)[0:10], columns=["A"])2263            tm.assert_frame_equal(expected, result)2264        with ensure_clean_store(setup_path) as store:2265            # floats w/o NaN2266            df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64")2267            df["cols"] = (df["cols"] + 10).apply(str)2268            store.append("df1", df, data_columns=True)2269            result = store.select("df1", where="values>2.0")2270            expected = df[df["values"] > 2.0]2271            tm.assert_frame_equal(expected, result)2272            # floats with NaN2273            df.iloc[0] = np.nan2274            expected = df[df["values"] > 2.0]2275            store.append("df2", df, data_columns=True, index=False)2276            result = store.select("df2", where="values>2.0")2277            tm.assert_frame_equal(expected, result)2278            # https://github.com/PyTables/PyTables/issues/2822279            # bug in selection when 0th row has a np.nan and an index2280            # store.append('df3',df,data_columns=True)2281            # result = store.select(2282            #    'df3', where='values>2.0')2283            # tm.assert_frame_equal(expected, result)2284            # not in first position float with NaN ok too2285            df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64")2286            df["cols"] = (df["cols"] + 10).apply(str)2287            df.iloc[1] = np.nan2288            expected = df[df["values"] > 2.0]2289            store.append("df4", df, data_columns=True)2290            result = store.select("df4", where="values>2.0")2291            tm.assert_frame_equal(expected, result)2292        # test selection with comparison against numpy scalar2293        # GH 112832294        with ensure_clean_store(setup_path) as store:2295            df = tm.makeDataFrame()2296            expected = df[df["A"] > 0]2297            store.append("df", df, data_columns=True)2298            np_zero = np.float64(0)  # noqa2299            result = store.select("df", where=["A>np_zero"])2300            tm.assert_frame_equal(expected, result)2301    def test_select_with_many_inputs(self, setup_path):2302        with ensure_clean_store(setup_path) as store:2303            df = DataFrame(2304                dict(2305                    ts=bdate_range("2012-01-01", periods=300),2306                    A=np.random.randn(300),2307                    B=range(300),2308                    users=["a"] * 502309                    + ["b"] * 502310                    + ["c"] * 1002311                    + [f"a{i:03d}" for i in range(100)],2312                )2313            )2314            _maybe_remove(store, "df")2315            store.append("df", df, data_columns=["ts", "A", "B", "users"])2316            # regular select2317            result = store.select("df", "ts>=Timestamp('2012-02-01')")2318            expected = df[df.ts >= Timestamp("2012-02-01")]2319            tm.assert_frame_equal(expected, result)2320            # small selector2321            result = store.select(2322                "df", "ts>=Timestamp('2012-02-01') & users=['a','b','c']"2323            )2324            expected = df[2325                (df.ts >= Timestamp("2012-02-01")) & df.users.isin(["a", "b", "c"])2326            ]2327            tm.assert_frame_equal(expected, result)2328            # big selector along the columns2329            selector = ["a", "b", "c"] + [f"a{i:03d}" for i in range(60)]2330            result = store.select(2331                "df", "ts>=Timestamp('2012-02-01') and users=selector"2332            )2333            expected = df[(df.ts >= Timestamp("2012-02-01")) & df.users.isin(selector)]2334            tm.assert_frame_equal(expected, result)2335            selector = range(100, 200)2336            result = store.select("df", "B=selector")2337            expected = df[df.B.isin(selector)]2338            tm.assert_frame_equal(expected, result)2339            assert len(result) == 1002340            # big selector along the index2341            selector = Index(df.ts[0:100].values)2342            result = store.select("df", "ts=selector")2343            expected = df[df.ts.isin(selector.values)]2344            tm.assert_frame_equal(expected, result)2345            assert len(result) == 1002346    def test_select_iterator(self, setup_path):2347        # single table2348        with ensure_clean_store(setup_path) as store:2349            df = tm.makeTimeDataFrame(500)2350            _maybe_remove(store, "df")2351            store.append("df", df)2352            expected = store.select("df")2353            results = list(store.select("df", iterator=True))2354            result = concat(results)2355            tm.assert_frame_equal(expected, result)2356            results = list(store.select("df", chunksize=100))2357            assert len(results) == 52358            result = concat(results)2359            tm.assert_frame_equal(expected, result)2360            results = list(store.select("df", chunksize=150))2361            result = concat(results)2362            tm.assert_frame_equal(result, expected)2363        with ensure_clean_path(setup_path) as path:2364            df = tm.makeTimeDataFrame(500)2365            df.to_hdf(path, "df_non_table")2366            with pytest.raises(TypeError):2367                read_hdf(path, "df_non_table", chunksize=100)2368            with pytest.raises(TypeError):2369                read_hdf(path, "df_non_table", iterator=True)2370        with ensure_clean_path(setup_path) as path:2371            df = tm.makeTimeDataFrame(500)2372            df.to_hdf(path, "df", format="table")2373            results = list(read_hdf(path, "df", chunksize=100))2374            result = concat(results)2375            assert len(results) == 52376            tm.assert_frame_equal(result, df)2377            tm.assert_frame_equal(result, read_hdf(path, "df"))2378        # multiple2379        with ensure_clean_store(setup_path) as store:2380            df1 = tm.makeTimeDataFrame(500)2381            store.append("df1", df1, data_columns=True)2382            df2 = tm.makeTimeDataFrame(500).rename(columns="{}_2".format)2383            df2["foo"] = "bar"2384            store.append("df2", df2)2385            df = concat([df1, df2], axis=1)2386            # full selection2387            expected = store.select_as_multiple(["df1", "df2"], selector="df1")2388            results = list(2389                store.select_as_multiple(["df1", "df2"], selector="df1", chunksize=150)2390            )2391            result = concat(results)2392            tm.assert_frame_equal(expected, result)2393    def test_select_iterator_complete_8014(self, setup_path):2394        # GH 80142395        # using iterator and where clause2396        chunksize = 1e42397        # no iterator2398        with ensure_clean_store(setup_path) as store:2399            expected = tm.makeTimeDataFrame(100064, "S")2400            _maybe_remove(store, "df")2401            store.append("df", expected)2402            beg_dt = expected.index[0]2403            end_dt = expected.index[-1]2404            # select w/o iteration and no where clause works2405            result = store.select("df")2406            tm.assert_frame_equal(expected, result)2407            # select w/o iterator and where clause, single term, begin2408            # of range, works2409            where = f"index >= '{beg_dt}'"2410            result = store.select("df", where=where)2411            tm.assert_frame_equal(expected, result)2412            # select w/o iterator and where clause, single term, end2413            # of range, works2414            where = f"index <= '{end_dt}'"2415            result = store.select("df", where=where)2416            tm.assert_frame_equal(expected, result)2417            # select w/o iterator and where clause, inclusive range,2418            # works2419            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2420            result = store.select("df", where=where)2421            tm.assert_frame_equal(expected, result)2422        # with iterator, full range2423        with ensure_clean_store(setup_path) as store:2424            expected = tm.makeTimeDataFrame(100064, "S")2425            _maybe_remove(store, "df")2426            store.append("df", expected)2427            beg_dt = expected.index[0]2428            end_dt = expected.index[-1]2429            # select w/iterator and no where clause works2430            results = list(store.select("df", chunksize=chunksize))2431            result = concat(results)2432            tm.assert_frame_equal(expected, result)2433            # select w/iterator and where clause, single term, begin of range2434            where = f"index >= '{beg_dt}'"2435            results = list(store.select("df", where=where, chunksize=chunksize))2436            result = concat(results)2437            tm.assert_frame_equal(expected, result)2438            # select w/iterator and where clause, single term, end of range2439            where = f"index <= '{end_dt}'"2440            results = list(store.select("df", where=where, chunksize=chunksize))2441            result = concat(results)2442            tm.assert_frame_equal(expected, result)2443            # select w/iterator and where clause, inclusive range2444            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2445            results = list(store.select("df", where=where, chunksize=chunksize))2446            result = concat(results)2447            tm.assert_frame_equal(expected, result)2448    def test_select_iterator_non_complete_8014(self, setup_path):2449        # GH 80142450        # using iterator and where clause2451        chunksize = 1e42452        # with iterator, non complete range2453        with ensure_clean_store(setup_path) as store:2454            expected = tm.makeTimeDataFrame(100064, "S")2455            _maybe_remove(store, "df")2456            store.append("df", expected)2457            beg_dt = expected.index[1]2458            end_dt = expected.index[-2]2459            # select w/iterator and where clause, single term, begin of range2460            where = f"index >= '{beg_dt}'"2461            results = list(store.select("df", where=where, chunksize=chunksize))2462            result = concat(results)2463            rexpected = expected[expected.index >= beg_dt]2464            tm.assert_frame_equal(rexpected, result)2465            # select w/iterator and where clause, single term, end of range2466            where = f"index <= '{end_dt}'"2467            results = list(store.select("df", where=where, chunksize=chunksize))2468            result = concat(results)2469            rexpected = expected[expected.index <= end_dt]2470            tm.assert_frame_equal(rexpected, result)2471            # select w/iterator and where clause, inclusive range2472            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2473            results = list(store.select("df", where=where, chunksize=chunksize))2474            result = concat(results)2475            rexpected = expected[2476                (expected.index >= beg_dt) & (expected.index <= end_dt)2477            ]2478            tm.assert_frame_equal(rexpected, result)2479        # with iterator, empty where2480        with ensure_clean_store(setup_path) as store:2481            expected = tm.makeTimeDataFrame(100064, "S")2482            _maybe_remove(store, "df")2483            store.append("df", expected)2484            end_dt = expected.index[-1]2485            # select w/iterator and where clause, single term, begin of range2486            where = f"index > '{end_dt}'"2487            results = list(store.select("df", where=where, chunksize=chunksize))2488            assert 0 == len(results)2489    def test_select_iterator_many_empty_frames(self, setup_path):2490        # GH 80142491        # using iterator and where clause can return many empty2492        # frames.2493        chunksize = int(1e4)2494        # with iterator, range limited to the first chunk2495        with ensure_clean_store(setup_path) as store:2496            expected = tm.makeTimeDataFrame(100000, "S")2497            _maybe_remove(store, "df")2498            store.append("df", expected)2499            beg_dt = expected.index[0]2500            end_dt = expected.index[chunksize - 1]2501            # select w/iterator and where clause, single term, begin of range2502            where = f"index >= '{beg_dt}'"2503            results = list(store.select("df", where=where, chunksize=chunksize))2504            result = concat(results)2505            rexpected = expected[expected.index >= beg_dt]2506            tm.assert_frame_equal(rexpected, result)2507            # select w/iterator and where clause, single term, end of range2508            where = f"index <= '{end_dt}'"2509            results = list(store.select("df", where=where, chunksize=chunksize))2510            assert len(results) == 12511            result = concat(results)2512            rexpected = expected[expected.index <= end_dt]2513            tm.assert_frame_equal(rexpected, result)2514            # select w/iterator and where clause, inclusive range2515            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2516            results = list(store.select("df", where=where, chunksize=chunksize))2517            # should be 1, is 102518            assert len(results) == 12519            result = concat(results)2520            rexpected = expected[2521                (expected.index >= beg_dt) & (expected.index <= end_dt)2522            ]2523            tm.assert_frame_equal(rexpected, result)2524            # select w/iterator and where clause which selects2525            # *nothing*.2526            #2527            # To be consistent with Python idiom I suggest this should2528            # return [] e.g. `for e in []: print True` never prints2529            # True.2530            where = f"index <= '{beg_dt}' & index >= '{end_dt}'"2531            results = list(store.select("df", where=where, chunksize=chunksize))2532            # should be []2533            assert len(results) == 02534    @pytest.mark.filterwarnings(2535        "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"2536    )2537    def test_retain_index_attributes(self, setup_path):2538        # GH 3499, losing frequency info on index recreation2539        df = DataFrame(2540            dict(A=Series(range(3), index=date_range("2000-1-1", periods=3, freq="H")))2541        )2542        with ensure_clean_store(setup_path) as store:2543            _maybe_remove(store, "data")2544            store.put("data", df, format="table")2545            result = store.get("data")2546            tm.assert_frame_equal(df, result)2547            for attr in ["freq", "tz", "name"]:2548                for idx in ["index", "columns"]:2549                    assert getattr(getattr(df, idx), attr, None) == getattr(2550                        getattr(result, idx), attr, None2551                    )2552            # try to append a table with a different frequency2553            with catch_warnings(record=True):2554                df2 = DataFrame(2555                    dict(2556                        A=Series(2557                            range(3), index=date_range("2002-1-1", periods=3, freq="D")2558                        )2559                    )2560                )2561                store.append("data", df2)2562            assert store.get_storer("data").info["index"]["freq"] is None2563            # this is ok2564            _maybe_remove(store, "df2")2565            df2 = DataFrame(2566                dict(2567                    A=Series(2568                        range(3),2569                        index=[2570                            Timestamp("20010101"),2571                            Timestamp("20010102"),2572                            Timestamp("20020101"),2573                        ],2574                    )2575                )2576            )2577            store.append("df2", df2)2578            df3 = DataFrame(2579                dict(2580                    A=Series(2581                        range(3), index=date_range("2002-1-1", periods=3, freq="D")2582                    )2583                )2584            )2585            store.append("df2", df3)2586    @pytest.mark.filterwarnings(2587        "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"2588    )2589    def test_retain_index_attributes2(self, setup_path):2590        with ensure_clean_path(setup_path) as path:2591            with catch_warnings(record=True):2592                df = DataFrame(2593                    dict(2594                        A=Series(2595                            range(3), index=date_range("2000-1-1", periods=3, freq="H")2596                        )2597                    )2598                )2599                df.to_hdf(path, "data", mode="w", append=True)2600                df2 = DataFrame(2601                    dict(2602                        A=Series(2603                            range(3), index=date_range("2002-1-1", periods=3, freq="D")2604                        )2605                    )2606                )2607                df2.to_hdf(path, "data", append=True)2608                idx = date_range("2000-1-1", periods=3, freq="H")2609                idx.name = "foo"2610                df = DataFrame(dict(A=Series(range(3), index=idx)))2611                df.to_hdf(path, "data", mode="w", append=True)2612            assert read_hdf(path, "data").index.name == "foo"2613            with catch_warnings(record=True):2614                idx2 = date_range("2001-1-1", periods=3, freq="H")2615                idx2.name = "bar"2616                df2 = DataFrame(dict(A=Series(range(3), index=idx2)))2617                df2.to_hdf(path, "data", append=True)2618            assert read_hdf(path, "data").index.name is None2619    def test_frame_select(self, setup_path):2620        df = tm.makeTimeDataFrame()2621        with ensure_clean_store(setup_path) as store:2622            store.put("frame", df, format="table")2623            date = df.index[len(df) // 2]2624            crit1 = Term("index>=date")2625            assert crit1.env.scope["date"] == date2626            crit2 = "columns=['A', 'D']"2627            crit3 = "columns=A"2628            result = store.select("frame", [crit1, crit2])2629            expected = df.loc[date:, ["A", "D"]]2630            tm.assert_frame_equal(result, expected)2631            result = store.select("frame", [crit3])2632            expected = df.loc[:, ["A"]]2633            tm.assert_frame_equal(result, expected)2634            # invalid terms2635            df = tm.makeTimeDataFrame()2636            store.append("df_time", df)2637            with pytest.raises(ValueError):2638                store.select("df_time", "index>0")2639            # can't select if not written as table2640            # store['frame'] = df2641            # with pytest.raises(ValueError):2642            #     store.select('frame', [crit1, crit2])2643    def test_frame_select_complex(self, setup_path):2644        # select via complex criteria2645        df = tm.makeTimeDataFrame()2646        df["string"] = "foo"2647        df.loc[df.index[0:4], "string"] = "bar"2648        with ensure_clean_store(setup_path) as store:2649            store.put("df", df, format="table", data_columns=["string"])2650            # empty2651            result = store.select("df", 'index>df.index[3] & string="bar"')2652            expected = df.loc[(df.index > df.index[3]) & (df.string == "bar")]2653            tm.assert_frame_equal(result, expected)2654            result = store.select("df", 'index>df.index[3] & string="foo"')2655            expected = df.loc[(df.index > df.index[3]) & (df.string == "foo")]2656            tm.assert_frame_equal(result, expected)2657            # or2658            result = store.select("df", 'index>df.index[3] | string="bar"')2659            expected = df.loc[(df.index > df.index[3]) | (df.string == "bar")]2660            tm.assert_frame_equal(result, expected)2661            result = store.select(2662                "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"'2663            )2664            expected = df.loc[2665                ((df.index > df.index[3]) & (df.index <= df.index[6]))2666                | (df.string == "bar")2667            ]2668            tm.assert_frame_equal(result, expected)2669            # invert2670            result = store.select("df", 'string!="bar"')2671            expected = df.loc[df.string != "bar"]2672            tm.assert_frame_equal(result, expected)2673            # invert not implemented in numexpr :(2674            with pytest.raises(NotImplementedError):2675                store.select("df", '~(string="bar")')2676            # invert ok for filters2677            result = store.select("df", "~(columns=['A','B'])")2678            expected = df.loc[:, df.columns.difference(["A", "B"])]2679            tm.assert_frame_equal(result, expected)2680            # in2681            result = store.select("df", "index>df.index[3] & columns in ['A','B']")2682            expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"])2683            tm.assert_frame_equal(result, expected)2684    def test_frame_select_complex2(self, setup_path):2685        with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths:2686            pp, hh = paths2687            # use non-trivial selection criteria2688            parms = DataFrame({"A": [1, 1, 2, 2, 3]})2689            parms.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"])2690            selection = read_hdf(pp, "df", where="A=[2,3]")2691            hist = DataFrame(2692                np.random.randn(25, 1),2693                columns=["data"],2694                index=MultiIndex.from_tuples(2695                    [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"]2696                ),2697            )2698            hist.to_hdf(hh, "df", mode="w", format="table")2699            expected = read_hdf(hh, "df", where="l1=[2, 3, 4]")2700            # scope with list like2701            l = selection.index.tolist()  # noqa2702            store = HDFStore(hh)2703            result = store.select("df", where="l1=l")2704            tm.assert_frame_equal(result, expected)2705            store.close()2706            result = read_hdf(hh, "df", where="l1=l")2707            tm.assert_frame_equal(result, expected)2708            # index2709            index = selection.index  # noqa2710            result = read_hdf(hh, "df", where="l1=index")2711            tm.assert_frame_equal(result, expected)2712            result = read_hdf(hh, "df", where="l1=selection.index")2713            tm.assert_frame_equal(result, expected)2714            result = read_hdf(hh, "df", where="l1=selection.index.tolist()")2715            tm.assert_frame_equal(result, expected)2716            result = read_hdf(hh, "df", where="l1=list(selection.index)")2717            tm.assert_frame_equal(result, expected)2718            # scope with index2719            store = HDFStore(hh)2720            result = store.select("df", where="l1=index")2721            tm.assert_frame_equal(result, expected)2722            result = store.select("df", where="l1=selection.index")2723            tm.assert_frame_equal(result, expected)2724            result = store.select("df", where="l1=selection.index.tolist()")2725            tm.assert_frame_equal(result, expected)2726            result = store.select("df", where="l1=list(selection.index)")2727            tm.assert_frame_equal(result, expected)2728            store.close()2729    def test_invalid_filtering(self, setup_path):2730        # can't use more than one filter (atm)2731        df = tm.makeTimeDataFrame()2732        with ensure_clean_store(setup_path) as store:2733            store.put("df", df, format="table")2734            # not implemented2735            with pytest.raises(NotImplementedError):2736                store.select("df", "columns=['A'] | columns=['B']")2737            # in theory we could deal with this2738            with pytest.raises(NotImplementedError):2739                store.select("df", "columns=['A','B'] & columns=['C']")2740    def test_string_select(self, setup_path):2741        # GH 29732742        with ensure_clean_store(setup_path) as store:2743            df = tm.makeTimeDataFrame()2744            # test string ==/!=2745            df["x"] = "none"2746            df.loc[df.index[2:7], "x"] = ""2747            store.append("df", df, data_columns=["x"])2748            result = store.select("df", "x=none")2749            expected = df[df.x == "none"]2750            tm.assert_frame_equal(result, expected)2751            result = store.select("df", "x!=none")2752            expected = df[df.x != "none"]2753            tm.assert_frame_equal(result, expected)2754            df2 = df.copy()2755            df2.loc[df2.x == "", "x"] = np.nan2756            store.append("df2", df2, data_columns=["x"])2757            result = store.select("df2", "x!=none")2758            expected = df2[isna(df2.x)]2759            tm.assert_frame_equal(result, expected)2760            # int ==/!=2761            df["int"] = 12762            df.loc[df.index[2:7], "int"] = 22763            store.append("df3", df, data_columns=["int"])2764            result = store.select("df3", "int=2")2765            expected = df[df.int == 2]2766            tm.assert_frame_equal(result, expected)2767            result = store.select("df3", "int!=2")2768            expected = df[df.int != 2]2769            tm.assert_frame_equal(result, expected)2770    def test_read_column(self, setup_path):2771        df = tm.makeTimeDataFrame()2772        with ensure_clean_store(setup_path) as store:2773            _maybe_remove(store, "df")2774            # GH 179122775            # HDFStore.select_column should raise a KeyError2776            # exception if the key is not a valid store2777            with pytest.raises(KeyError, match="No object named df in the file"):2778                store.select_column("df", "index")2779            store.append("df", df)2780            # error2781            with pytest.raises(2782                KeyError, match=re.escape("'column [foo] not found in the table'")2783            ):2784                store.select_column("df", "foo")2785            with pytest.raises(Exception):2786                store.select_column("df", "index", where=["index>5"])2787            # valid2788            result = store.select_column("df", "index")2789            tm.assert_almost_equal(result.values, Series(df.index).values)2790            assert isinstance(result, Series)2791            # not a data indexable column2792            with pytest.raises(ValueError):2793                store.select_column("df", "values_block_0")2794            # a data column2795            df2 = df.copy()2796            df2["string"] = "foo"2797            store.append("df2", df2, data_columns=["string"])2798            result = store.select_column("df2", "string")2799            tm.assert_almost_equal(result.values, df2["string"].values)2800            # a data column with NaNs, result excludes the NaNs2801            df3 = df.copy()2802            df3["string"] = "foo"2803            df3.loc[df3.index[4:6], "string"] = np.nan2804            store.append("df3", df3, data_columns=["string"])2805            result = store.select_column("df3", "string")2806            tm.assert_almost_equal(result.values, df3["string"].values)2807            # start/stop2808            result = store.select_column("df3", "string", start=2)2809            tm.assert_almost_equal(result.values, df3["string"].values[2:])2810            result = store.select_column("df3", "string", start=-2)2811            tm.assert_almost_equal(result.values, df3["string"].values[-2:])2812            result = store.select_column("df3", "string", stop=2)2813            tm.assert_almost_equal(result.values, df3["string"].values[:2])2814            result = store.select_column("df3", "string", stop=-2)2815            tm.assert_almost_equal(result.values, df3["string"].values[:-2])2816            result = store.select_column("df3", "string", start=2, stop=-2)2817            tm.assert_almost_equal(result.values, df3["string"].values[2:-2])2818            result = store.select_column("df3", "string", start=-2, stop=2)2819            tm.assert_almost_equal(result.values, df3["string"].values[-2:2])2820            # GH 10392 - make sure column name is preserved2821            df4 = DataFrame({"A": np.random.randn(10), "B": "foo"})2822            store.append("df4", df4, data_columns=True)2823            expected = df4["B"]2824            result = store.select_column("df4", "B")2825            tm.assert_series_equal(result, expected)2826    def test_coordinates(self, setup_path):2827        df = tm.makeTimeDataFrame()2828        with ensure_clean_store(setup_path) as store:2829            _maybe_remove(store, "df")2830            store.append("df", df)2831            # all2832            c = store.select_as_coordinates("df")2833            assert (c.values == np.arange(len(df.index))).all()2834            # get coordinates back & test vs frame2835            _maybe_remove(store, "df")2836            df = DataFrame(dict(A=range(5), B=range(5)))2837            store.append("df", df)2838            c = store.select_as_coordinates("df", ["index<3"])2839            assert (c.values == np.arange(3)).all()2840            result = store.select("df", where=c)2841            expected = df.loc[0:2, :]2842            tm.assert_frame_equal(result, expected)2843            c = store.select_as_coordinates("df", ["index>=3", "index<=4"])2844            assert (c.values == np.arange(2) + 3).all()2845            result = store.select("df", where=c)2846            expected = df.loc[3:4, :]2847            tm.assert_frame_equal(result, expected)2848            assert isinstance(c, Index)2849            # multiple tables2850            _maybe_remove(store, "df1")2851            _maybe_remove(store, "df2")2852            df1 = tm.makeTimeDataFrame()2853            df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2854            store.append("df1", df1, data_columns=["A", "B"])2855            store.append("df2", df2)2856            c = store.select_as_coordinates("df1", ["A>0", "B>0"])2857            df1_result = store.select("df1", c)2858            df2_result = store.select("df2", c)2859            result = concat([df1_result, df2_result], axis=1)2860            expected = concat([df1, df2], axis=1)2861            expected = expected[(expected.A > 0) & (expected.B > 0)]2862            tm.assert_frame_equal(result, expected)2863        # pass array/mask as the coordinates2864        with ensure_clean_store(setup_path) as store:2865            df = DataFrame(2866                np.random.randn(1000, 2), index=date_range("20000101", periods=1000)2867            )2868            store.append("df", df)2869            c = store.select_column("df", "index")2870            where = c[DatetimeIndex(c).month == 5].index2871            expected = df.iloc[where]2872            # locations2873            result = store.select("df", where=where)2874            tm.assert_frame_equal(result, expected)2875            # boolean2876            result = store.select("df", where=where)2877            tm.assert_frame_equal(result, expected)2878            # invalid2879            with pytest.raises(ValueError):2880                store.select("df", where=np.arange(len(df), dtype="float64"))2881            with pytest.raises(ValueError):2882                store.select("df", where=np.arange(len(df) + 1))2883            with pytest.raises(ValueError):2884                store.select("df", where=np.arange(len(df)), start=5)2885            with pytest.raises(ValueError):2886                store.select("df", where=np.arange(len(df)), start=5, stop=10)2887            # selection with filter2888            selection = date_range("20000101", periods=500)2889            result = store.select("df", where="index in selection")2890            expected = df[df.index.isin(selection)]2891            tm.assert_frame_equal(result, expected)2892            # list2893            df = DataFrame(np.random.randn(10, 2))2894            store.append("df2", df)2895            result = store.select("df2", where=[0, 3, 5])2896            expected = df.iloc[[0, 3, 5]]2897            tm.assert_frame_equal(result, expected)2898            # boolean2899            where = [True] * 102900            where[-2] = False2901            result = store.select("df2", where=where)2902            expected = df.loc[where]2903            tm.assert_frame_equal(result, expected)2904            # start/stop2905            result = store.select("df2", start=5, stop=10)2906            expected = df[5:10]2907            tm.assert_frame_equal(result, expected)2908    def test_append_to_multiple(self, setup_path):2909        df1 = tm.makeTimeDataFrame()2910        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2911        df2["foo"] = "bar"2912        df = concat([df1, df2], axis=1)2913        with ensure_clean_store(setup_path) as store:2914            # exceptions2915            with pytest.raises(ValueError):2916                store.append_to_multiple(2917                    {"df1": ["A", "B"], "df2": None}, df, selector="df3"2918                )2919            with pytest.raises(ValueError):2920                store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3")2921            with pytest.raises(ValueError):2922                store.append_to_multiple("df1", df, "df1")2923            # regular operation2924            store.append_to_multiple(2925                {"df1": ["A", "B"], "df2": None}, df, selector="df1"2926            )2927            result = store.select_as_multiple(2928                ["df1", "df2"], where=["A>0", "B>0"], selector="df1"2929            )2930            expected = df[(df.A > 0) & (df.B > 0)]2931            tm.assert_frame_equal(result, expected)2932    def test_append_to_multiple_dropna(self, setup_path):2933        df1 = tm.makeTimeDataFrame()2934        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2935        df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan2936        df = concat([df1, df2], axis=1)2937        with ensure_clean_store(setup_path) as store:2938            # dropna=True should guarantee rows are synchronized2939            store.append_to_multiple(2940                {"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True2941            )2942            result = store.select_as_multiple(["df1", "df2"])2943            expected = df.dropna()2944            tm.assert_frame_equal(result, expected)2945            tm.assert_index_equal(store.select("df1").index, store.select("df2").index)2946    @pytest.mark.xfail(2947        run=False, reason="append_to_multiple_dropna_false is not raising as failed"2948    )2949    def test_append_to_multiple_dropna_false(self, setup_path):2950        df1 = tm.makeTimeDataFrame()2951        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2952        df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan2953        df = concat([df1, df2], axis=1)2954        with ensure_clean_store(setup_path) as store:2955            # dropna=False shouldn't synchronize row indexes2956            store.append_to_multiple(2957                {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False2958            )2959            with pytest.raises(ValueError):2960                store.select_as_multiple(["df1a", "df2a"])2961            assert not store.select("df1a").index.equals(store.select("df2a").index)2962    def test_append_to_multiple_min_itemsize(self, setup_path):2963        # GH 112382964        df = pd.DataFrame(2965            {2966                "IX": np.arange(1, 21),2967                "Num": np.arange(1, 21),2968                "BigNum": np.arange(1, 21) * 88,2969                "Str": ["a" for _ in range(20)],2970                "LongStr": ["abcde" for _ in range(20)],2971            }2972        )2973        expected = df.iloc[[0]]2974        with ensure_clean_store(setup_path) as store:2975            store.append_to_multiple(2976                {2977                    "index": ["IX"],2978                    "nums": ["Num", "BigNum"],2979                    "strs": ["Str", "LongStr"],2980                },2981                df.iloc[[0]],2982                "index",2983                min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},2984            )2985            result = store.select_as_multiple(["index", "nums", "strs"])2986            tm.assert_frame_equal(result, expected)2987    def test_select_as_multiple(self, setup_path):2988        df1 = tm.makeTimeDataFrame()2989        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2990        df2["foo"] = "bar"2991        with ensure_clean_store(setup_path) as store:2992            # no tables stored2993            with pytest.raises(Exception):2994                store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")2995            store.append("df1", df1, data_columns=["A", "B"])2996            store.append("df2", df2)2997            # exceptions2998            with pytest.raises(Exception):2999                store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")3000            with pytest.raises(Exception):3001                store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1")3002            msg = "'No object named df3 in the file'"3003            with pytest.raises(KeyError, match=msg):3004                store.select_as_multiple(3005                    ["df1", "df3"], where=["A>0", "B>0"], selector="df1"3006                )3007            with pytest.raises(KeyError, match=msg):3008                store.select_as_multiple(["df3"], where=["A>0", "B>0"], selector="df1")3009            with pytest.raises(KeyError, match="'No object named df4 in the file'"):3010                store.select_as_multiple(3011                    ["df1", "df2"], where=["A>0", "B>0"], selector="df4"3012                )3013            # default select3014            result = store.select("df1", ["A>0", "B>0"])3015            expected = store.select_as_multiple(3016                ["df1"], where=["A>0", "B>0"], selector="df1"3017            )3018            tm.assert_frame_equal(result, expected)3019            expected = store.select_as_multiple(3020                "df1", where=["A>0", "B>0"], selector="df1"3021            )3022            tm.assert_frame_equal(result, expected)3023            # multiple3024            result = store.select_as_multiple(3025                ["df1", "df2"], where=["A>0", "B>0"], selector="df1"3026            )3027            expected = concat([df1, df2], axis=1)3028            expected = expected[(expected.A > 0) & (expected.B > 0)]3029            tm.assert_frame_equal(result, expected)3030            # multiple (diff selector)3031            result = store.select_as_multiple(3032                ["df1", "df2"], where="index>df2.index[4]", selector="df2"3033            )3034            expected = concat([df1, df2], axis=1)3035            expected = expected[5:]3036            tm.assert_frame_equal(result, expected)3037            # test exception for diff rows3038            store.append("df3", tm.makeTimeDataFrame(nper=50))3039            with pytest.raises(ValueError):3040                store.select_as_multiple(3041                    ["df1", "df3"], where=["A>0", "B>0"], selector="df1"3042                )3043    @pytest.mark.skipif(3044        LooseVersion(tables.__version__) < LooseVersion("3.1.0"),3045        reason=("tables version does not support fix for nan selection bug: GH 4858"),3046    )3047    def test_nan_selection_bug_4858(self, setup_path):3048        with ensure_clean_store(setup_path) as store:3049            df = DataFrame(dict(cols=range(6), values=range(6)), dtype="float64")3050            df["cols"] = (df["cols"] + 10).apply(str)3051            df.iloc[0] = np.nan3052            expected = DataFrame(3053                dict(cols=["13.0", "14.0", "15.0"], values=[3.0, 4.0, 5.0]),3054                index=[3, 4, 5],3055            )3056            # write w/o the index on that particular column3057            store.append("df", df, data_columns=True, index=["cols"])3058            result = store.select("df", where="values>2.0")3059            tm.assert_frame_equal(result, expected)3060    def test_start_stop_table(self, setup_path):3061        with ensure_clean_store(setup_path) as store:3062            # table3063            df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))3064            store.append("df", df)3065            result = store.select("df", "columns=['A']", start=0, stop=5)3066            expected = df.loc[0:4, ["A"]]3067            tm.assert_frame_equal(result, expected)3068            # out of range3069            result = store.select("df", "columns=['A']", start=30, stop=40)3070            assert len(result) == 03071            expected = df.loc[30:40, ["A"]]3072            tm.assert_frame_equal(result, expected)3073    def test_start_stop_multiple(self, setup_path):3074        # GH 162093075        with ensure_clean_store(setup_path) as store:3076            df = DataFrame({"foo": [1, 2], "bar": [1, 2]})3077            store.append_to_multiple(3078                {"selector": ["foo"], "data": None}, df, selector="selector"3079            )3080            result = store.select_as_multiple(3081                ["selector", "data"], selector="selector", start=0, stop=13082            )3083            expected = df.loc[[0], ["foo", "bar"]]3084            tm.assert_frame_equal(result, expected)3085    def test_start_stop_fixed(self, setup_path):3086        with ensure_clean_store(setup_path) as store:3087            # fixed, GH 82873088            df = DataFrame(3089                dict(A=np.random.rand(20), B=np.random.rand(20)),3090                index=pd.date_range("20130101", periods=20),3091            )3092            store.put("df", df)3093            result = store.select("df", start=0, stop=5)3094            expected = df.iloc[0:5, :]3095            tm.assert_frame_equal(result, expected)3096            result = store.select("df", start=5, stop=10)3097            expected = df.iloc[5:10, :]3098            tm.assert_frame_equal(result, expected)3099            # out of range3100            result = store.select("df", start=30, stop=40)3101            expected = df.iloc[30:40, :]3102            tm.assert_frame_equal(result, expected)3103            # series3104            s = df.A3105            store.put("s", s)3106            result = store.select("s", start=0, stop=5)3107            expected = s.iloc[0:5]3108            tm.assert_series_equal(result, expected)3109            result = store.select("s", start=5, stop=10)3110            expected = s.iloc[5:10]3111            tm.assert_series_equal(result, expected)3112            # sparse; not implemented3113            df = tm.makeDataFrame()3114            df.iloc[3:5, 1:3] = np.nan3115            df.iloc[8:10, -2] = np.nan3116    def test_select_filter_corner(self, setup_path):3117        df = DataFrame(np.random.randn(50, 100))3118        df.index = [f"{c:3d}" for c in df.index]3119        df.columns = [f"{c:3d}" for c in df.columns]3120        with ensure_clean_store(setup_path) as store:3121            store.put("frame", df, format="table")3122            crit = "columns=df.columns[:75]"3123            result = store.select("frame", [crit])3124            tm.assert_frame_equal(result, df.loc[:, df.columns[:75]])3125            crit = "columns=df.columns[:75:2]"3126            result = store.select("frame", [crit])3127            tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])3128    def test_path_pathlib(self, setup_path):3129        df = tm.makeDataFrame()3130        result = tm.round_trip_pathlib(3131            lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")3132        )3133        tm.assert_frame_equal(df, result)3134    @pytest.mark.parametrize("start, stop", [(0, 2), (1, 2), (None, None)])3135    def test_contiguous_mixed_data_table(self, start, stop, setup_path):3136        # GH 170213137        # ValueError when reading a contiguous mixed-data table ft. VLArray3138        df = DataFrame(3139            {3140                "a": Series([20111010, 20111011, 20111012]),3141                "b": Series(["ab", "cd", "ab"]),3142            }3143        )3144        with ensure_clean_store(setup_path) as store:3145            store.append("test_dataset", df)3146            result = store.select("test_dataset", start=start, stop=stop)3147            tm.assert_frame_equal(df[start:stop], result)3148    def test_path_pathlib_hdfstore(self, setup_path):3149        df = tm.makeDataFrame()3150        def writer(path):3151            with pd.HDFStore(path) as store:3152                df.to_hdf(store, "df")3153        def reader(path):3154            with pd.HDFStore(path) as store:3155                return pd.read_hdf(store, "df")3156        result = tm.round_trip_pathlib(writer, reader)3157        tm.assert_frame_equal(df, result)3158    def test_pickle_path_localpath(self, setup_path):3159        df = tm.makeDataFrame()3160        result = tm.round_trip_pathlib(3161            lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")3162        )3163        tm.assert_frame_equal(df, result)3164    def test_path_localpath_hdfstore(self, setup_path):3165        df = tm.makeDataFrame()3166        def writer(path):3167            with pd.HDFStore(path) as store:3168                df.to_hdf(store, "df")3169        def reader(path):3170            with pd.HDFStore(path) as store:3171                return pd.read_hdf(store, "df")3172        result = tm.round_trip_localpath(writer, reader)3173        tm.assert_frame_equal(df, result)3174    def _check_roundtrip(self, obj, comparator, path, compression=False, **kwargs):3175        options = {}3176        if compression:3177            options["complib"] = _default_compressor3178        with ensure_clean_store(path, "w", **options) as store:3179            store["obj"] = obj3180            retrieved = store["obj"]3181            comparator(retrieved, obj, **kwargs)3182    def _check_double_roundtrip(3183        self, obj, comparator, path, compression=False, **kwargs3184    ):3185        options = {}3186        if compression:3187            options["complib"] = compression or _default_compressor3188        with ensure_clean_store(path, "w", **options) as store:3189            store["obj"] = obj3190            retrieved = store["obj"]3191            comparator(retrieved, obj, **kwargs)3192            store["obj"] = retrieved3193            again = store["obj"]3194            comparator(again, obj, **kwargs)3195    def _check_roundtrip_table(self, obj, comparator, path, compression=False):3196        options = {}3197        if compression:3198            options["complib"] = _default_compressor3199        with ensure_clean_store(path, "w", **options) as store:3200            store.put("obj", obj, format="table")3201            retrieved = store["obj"]3202            comparator(retrieved, obj)3203    def test_multiple_open_close(self, setup_path):3204        # gh-4409: open & close multiple times3205        with ensure_clean_path(setup_path) as path:3206            df = tm.makeDataFrame()3207            df.to_hdf(path, "df", mode="w", format="table")3208            # single3209            store = HDFStore(path)3210            assert "CLOSED" not in store.info()3211            assert store.is_open3212            store.close()3213            assert "CLOSED" in store.info()3214            assert not store.is_open3215        with ensure_clean_path(setup_path) as path:3216            if pytables._table_file_open_policy_is_strict:3217                # multiples3218                store1 = HDFStore(path)3219                with pytest.raises(ValueError):3220                    HDFStore(path)3221                store1.close()3222            else:3223                # multiples3224                store1 = HDFStore(path)3225                store2 = HDFStore(path)3226                assert "CLOSED" not in store1.info()3227                assert "CLOSED" not in store2.info()3228                assert store1.is_open3229                assert store2.is_open3230                store1.close()3231                assert "CLOSED" in store1.info()3232                assert not store1.is_open3233                assert "CLOSED" not in store2.info()3234                assert store2.is_open3235                store2.close()3236                assert "CLOSED" in store1.info()3237                assert "CLOSED" in store2.info()3238                assert not store1.is_open3239                assert not store2.is_open3240                # nested close3241                store = HDFStore(path, mode="w")3242                store.append("df", df)3243                store2 = HDFStore(path)3244                store2.append("df2", df)3245                store2.close()3246                assert "CLOSED" in store2.info()3247                assert not store2.is_open3248                store.close()3249                assert "CLOSED" in store.info()3250                assert not store.is_open3251                # double closing3252                store = HDFStore(path, mode="w")3253                store.append("df", df)3254                store2 = HDFStore(path)3255                store.close()3256                assert "CLOSED" in store.info()3257                assert not store.is_open3258                store2.close()3259                assert "CLOSED" in store2.info()3260                assert not store2.is_open3261        # ops on a closed store3262        with ensure_clean_path(setup_path) as path:3263            df = tm.makeDataFrame()3264            df.to_hdf(path, "df", mode="w", format="table")3265            store = HDFStore(path)3266            store.close()3267            with pytest.raises(ClosedFileError):3268                store.keys()3269            with pytest.raises(ClosedFileError):3270                "df" in store3271            with pytest.raises(ClosedFileError):3272                len(store)3273            with pytest.raises(ClosedFileError):3274                store["df"]3275            with pytest.raises(AttributeError):3276                store.df3277            with pytest.raises(ClosedFileError):3278                store.select("df")3279            with pytest.raises(ClosedFileError):3280                store.get("df")3281            with pytest.raises(ClosedFileError):3282                store.append("df2", df)3283            with pytest.raises(ClosedFileError):3284                store.put("df3", df)3285            with pytest.raises(ClosedFileError):3286                store.get_storer("df2")3287            with pytest.raises(ClosedFileError):3288                store.remove("df2")3289            with pytest.raises(ClosedFileError, match="file is not open"):3290                store.select("df")3291    def test_pytables_native_read(self, datapath, setup_path):3292        with ensure_clean_store(3293            datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"3294        ) as store:3295            d2 = store["detector/readout"]3296            assert isinstance(d2, DataFrame)3297    @pytest.mark.skipif(3298        is_platform_windows(), reason="native2 read fails oddly on windows"3299    )3300    def test_pytables_native2_read(self, datapath, setup_path):3301        with ensure_clean_store(3302            datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"3303        ) as store:3304            str(store)3305            d1 = store["detector"]3306            assert isinstance(d1, DataFrame)3307    @td.xfail_non_writeable3308    def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path):3309        # GH 245103310        # legacy table with fixed format written in Python 23311        with ensure_clean_store(3312            datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r"3313        ) as store:3314            result = store.select("df")3315            expected = pd.DataFrame(3316                [[1, 2, 3, "D"]],3317                columns=["A", "B", "C", "D"],3318                index=pd.Index(["ABC"], name="INDEX_NAME"),3319            )3320            tm.assert_frame_equal(expected, result)3321    def test_legacy_table_fixed_format_read_datetime_py2(self, datapath, setup_path):3322        # GH 317503323        # legacy table with fixed format and datetime64 column written in Python 23324        with ensure_clean_store(3325            datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"),3326            mode="r",3327        ) as store:3328            result = store.select("df")3329            expected = pd.DataFrame(3330                [[pd.Timestamp("2020-02-06T18:00")]],3331                columns=["A"],3332                index=pd.Index(["date"]),3333            )3334            tm.assert_frame_equal(expected, result)3335    def test_legacy_table_read_py2(self, datapath, setup_path):3336        # issue: 249253337        # legacy table written in Python 23338        with ensure_clean_store(3339            datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r"3340        ) as store:3341            result = store.select("table")3342        expected = pd.DataFrame({"a": ["a", "b"], "b": [2, 3]})3343        tm.assert_frame_equal(expected, result)3344    def test_copy(self, setup_path):3345        with catch_warnings(record=True):3346            def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs):3347                try:3348                    store = HDFStore(f, "r")3349                    if new_f is None:3350                        import tempfile3351                        fd, new_f = tempfile.mkstemp()3352                    tstore = store.copy(3353                        new_f, keys=keys, propindexes=propindexes, **kwargs3354                    )3355                    # check keys3356                    if keys is None:3357                        keys = store.keys()3358                    assert set(keys) == set(tstore.keys())3359                    # check indices & nrows3360                    for k in tstore.keys():3361                        if tstore.get_storer(k).is_table:3362                            new_t = tstore.get_storer(k)3363                            orig_t = store.get_storer(k)3364                            assert orig_t.nrows == new_t.nrows3365                            # check propindixes3366                            if propindexes:3367                                for a in orig_t.axes:3368                                    if a.is_indexed:3369                                        assert new_t[a.name].is_indexed3370                finally:3371                    safe_close(store)3372                    safe_close(tstore)3373                    try:3374                        os.close(fd)3375                    except (OSError, ValueError):3376                        pass3377                    safe_remove(new_f)3378            # new table3379            df = tm.makeDataFrame()3380            try:3381                path = create_tempfile(setup_path)3382                st = HDFStore(path)3383                st.append("df", df, data_columns=["A"])3384                st.close()3385                do_copy(f=path)3386                do_copy(f=path, propindexes=False)3387            finally:3388                safe_remove(path)3389    def test_store_datetime_fractional_secs(self, setup_path):3390        with ensure_clean_store(setup_path) as store:3391            dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)3392            series = Series([0], [dt])3393            store["a"] = series3394            assert store["a"].index[0] == dt3395    def test_tseries_indices_series(self, setup_path):3396        with ensure_clean_store(setup_path) as store:3397            idx = tm.makeDateIndex(10)3398            ser = Series(np.random.randn(len(idx)), idx)3399            store["a"] = ser3400            result = store["a"]3401            tm.assert_series_equal(result, ser)3402            assert result.index.freq == ser.index.freq3403            tm.assert_class_equal(result.index, ser.index, obj="series index")3404            idx = tm.makePeriodIndex(10)3405            ser = Series(np.random.randn(len(idx)), idx)3406            store["a"] = ser3407            result = store["a"]3408            tm.assert_series_equal(result, ser)3409            assert result.index.freq == ser.index.freq3410            tm.assert_class_equal(result.index, ser.index, obj="series index")3411    def test_tseries_indices_frame(self, setup_path):3412        with ensure_clean_store(setup_path) as store:3413            idx = tm.makeDateIndex(10)3414            df = DataFrame(np.random.randn(len(idx), 3), index=idx)3415            store["a"] = df3416            result = store["a"]3417            tm.assert_frame_equal(result, df)3418            assert result.index.freq == df.index.freq3419            tm.assert_class_equal(result.index, df.index, obj="dataframe index")3420            idx = tm.makePeriodIndex(10)3421            df = DataFrame(np.random.randn(len(idx), 3), idx)3422            store["a"] = df3423            result = store["a"]3424            tm.assert_frame_equal(result, df)3425            assert result.index.freq == df.index.freq3426            tm.assert_class_equal(result.index, df.index, obj="dataframe index")3427    def test_unicode_index(self, setup_path):3428        unicode_values = ["\u03c3", "\u03c3\u03c3"]3429        # PerformanceWarning3430        with catch_warnings(record=True):3431            simplefilter("ignore", pd.errors.PerformanceWarning)3432            s = Series(np.random.randn(len(unicode_values)), unicode_values)3433            self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)3434    def test_unicode_longer_encoded(self, setup_path):3435        # GH 112343436        char = "\u0394"3437        df = pd.DataFrame({"A": [char]})3438        with ensure_clean_store(setup_path) as store:3439            store.put("df", df, format="table", encoding="utf-8")3440            result = store.get("df")3441            tm.assert_frame_equal(result, df)3442        df = pd.DataFrame({"A": ["a", char], "B": ["b", "b"]})3443        with ensure_clean_store(setup_path) as store:3444            store.put("df", df, format="table", encoding="utf-8")3445            result = store.get("df")3446            tm.assert_frame_equal(result, df)3447    @td.xfail_non_writeable3448    def test_store_datetime_mixed(self, setup_path):3449        df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})3450        ts = tm.makeTimeSeries()3451        df["d"] = ts.index[:3]3452        self._check_roundtrip(df, tm.assert_frame_equal, path=setup_path)3453    # FIXME: don't leave commented-out code3454    # def test_cant_write_multiindex_table(self):3455    #     # for now, #18483456    #     df = DataFrame(np.random.randn(10, 4),3457    #                    index=[np.arange(5).repeat(2),3458    #                           np.tile(np.arange(2), 5)])3459    #3460    #     with pytest.raises(Exception):3461    #         store.put('foo', df, format='table')3462    def test_append_with_diff_col_name_types_raises_value_error(self, setup_path):3463        df = DataFrame(np.random.randn(10, 1))3464        df2 = DataFrame({"a": np.random.randn(10)})3465        df3 = DataFrame({(1, 2): np.random.randn(10)})3466        df4 = DataFrame({("1", 2): np.random.randn(10)})3467        df5 = DataFrame({("1", 2, object): np.random.randn(10)})3468        with ensure_clean_store(setup_path) as store:3469            name = f"df_{tm.rands(10)}"3470            store.append(name, df)3471            for d in (df2, df3, df4, df5):3472                with pytest.raises(ValueError):3473                    store.append(name, d)3474    def test_query_with_nested_special_character(self, setup_path):3475        df = DataFrame(3476            {3477                "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"],3478                "b": [1, 2, 3, 4, 5, 6, 7, 8],3479            }3480        )3481        expected = df[df.a == "test & test"]3482        with ensure_clean_store(setup_path) as store:3483            store.append("test", df, format="table", data_columns=True)3484            result = store.select("test", 'a = "test & test"')3485        tm.assert_frame_equal(expected, result)3486    def test_categorical(self, setup_path):3487        with ensure_clean_store(setup_path) as store:3488            # Basic3489            _maybe_remove(store, "s")3490            s = Series(3491                Categorical(3492                    ["a", "b", "b", "a", "a", "c"],3493                    categories=["a", "b", "c", "d"],3494                    ordered=False,3495                )3496            )3497            store.append("s", s, format="table")3498            result = store.select("s")3499            tm.assert_series_equal(s, result)3500            _maybe_remove(store, "s_ordered")3501            s = Series(3502                Categorical(3503                    ["a", "b", "b", "a", "a", "c"],3504                    categories=["a", "b", "c", "d"],3505                    ordered=True,3506                )3507            )3508            store.append("s_ordered", s, format="table")3509            result = store.select("s_ordered")3510            tm.assert_series_equal(s, result)3511            _maybe_remove(store, "df")3512            df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})3513            store.append("df", df, format="table")3514            result = store.select("df")3515            tm.assert_frame_equal(result, df)3516            # Dtypes3517            _maybe_remove(store, "si")3518            s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")3519            store.append("si", s)3520            result = store.select("si")3521            tm.assert_series_equal(result, s)3522            _maybe_remove(store, "si2")3523            s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")3524            store.append("si2", s)3525            result = store.select("si2")3526            tm.assert_series_equal(result, s)3527            # Multiple3528            _maybe_remove(store, "df2")3529            df2 = df.copy()3530            df2["s2"] = Series(list("abcdefg")).astype("category")3531            store.append("df2", df2)3532            result = store.select("df2")3533            tm.assert_frame_equal(result, df2)3534            # Make sure the metadata is OK3535            info = store.info()3536            assert "/df2   " in info3537            # assert '/df2/meta/values_block_0/meta' in info3538            assert "/df2/meta/values_block_1/meta" in info3539            # unordered3540            _maybe_remove(store, "s2")3541            s = Series(3542                Categorical(3543                    ["a", "b", "b", "a", "a", "c"],3544                    categories=["a", "b", "c", "d"],3545                    ordered=False,3546                )3547            )3548            store.append("s2", s, format="table")3549            result = store.select("s2")3550            tm.assert_series_equal(result, s)3551            # Query3552            _maybe_remove(store, "df3")3553            store.append("df3", df, data_columns=["s"])3554            expected = df[df.s.isin(["b", "c"])]3555            result = store.select("df3", where=['s in ["b","c"]'])3556            tm.assert_frame_equal(result, expected)3557            expected = df[df.s.isin(["b", "c"])]3558            result = store.select("df3", where=['s = ["b","c"]'])3559            tm.assert_frame_equal(result, expected)3560            expected = df[df.s.isin(["d"])]3561            result = store.select("df3", where=['s in ["d"]'])3562            tm.assert_frame_equal(result, expected)3563            expected = df[df.s.isin(["f"])]3564            result = store.select("df3", where=['s in ["f"]'])3565            tm.assert_frame_equal(result, expected)3566            # Appending with same categories is ok3567            store.append("df3", df)3568            df = concat([df, df])3569            expected = df[df.s.isin(["b", "c"])]3570            result = store.select("df3", where=['s in ["b","c"]'])3571            tm.assert_frame_equal(result, expected)3572            # Appending must have the same categories3573            df3 = df.copy()3574            df3["s"].cat.remove_unused_categories(inplace=True)3575            with pytest.raises(ValueError):3576                store.append("df3", df3)3577            # Remove, and make sure meta data is removed (its a recursive3578            # removal so should be).3579            result = store.select("df3/meta/s/meta")3580            assert result is not None3581            store.remove("df3")3582            with pytest.raises(3583                KeyError, match="'No object named df3/meta/s/meta in the file'"3584            ):3585                store.select("df3/meta/s/meta")3586    def test_categorical_conversion(self, setup_path):3587        # GH133223588        # Check that read_hdf with categorical columns doesn't return rows if3589        # where criteria isn't met.3590        obsids = ["ESP_012345_6789", "ESP_987654_3210"]3591        imgids = ["APF00006np", "APF0001imm"]3592        data = [4.3, 9.8]3593        # Test without categories3594        df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data))3595        # We are expecting an empty DataFrame matching types of df3596        expected = df.iloc[[], :]3597        with ensure_clean_path(setup_path) as path:3598            df.to_hdf(path, "df", format="table", data_columns=True)3599            result = read_hdf(path, "df", where="obsids=B")3600            tm.assert_frame_equal(result, expected)3601        # Test with categories3602        df.obsids = df.obsids.astype("category")3603        df.imgids = df.imgids.astype("category")3604        # We are expecting an empty DataFrame matching types of df3605        expected = df.iloc[[], :]3606        with ensure_clean_path(setup_path) as path:3607            df.to_hdf(path, "df", format="table", data_columns=True)3608            result = read_hdf(path, "df", where="obsids=B")3609            tm.assert_frame_equal(result, expected)3610    def test_categorical_nan_only_columns(self, setup_path):3611        # GH184133612        # Check that read_hdf with categorical columns with NaN-only values can3613        # be read back.3614        df = pd.DataFrame(3615            {3616                "a": ["a", "b", "c", np.nan],3617                "b": [np.nan, np.nan, np.nan, np.nan],3618                "c": [1, 2, 3, 4],3619                "d": pd.Series([None] * 4, dtype=object),3620            }3621        )3622        df["a"] = df.a.astype("category")3623        df["b"] = df.b.astype("category")3624        df["d"] = df.b.astype("category")3625        expected = df3626        with ensure_clean_path(setup_path) as path:3627            df.to_hdf(path, "df", format="table", data_columns=True)3628            result = read_hdf(path, "df")3629            tm.assert_frame_equal(result, expected)3630    def test_duplicate_column_name(self, setup_path):3631        df = DataFrame(columns=["a", "a"], data=[[0, 0]])3632        with ensure_clean_path(setup_path) as path:3633            with pytest.raises(ValueError):3634                df.to_hdf(path, "df", format="fixed")3635            df.to_hdf(path, "df", format="table")3636            other = read_hdf(path, "df")3637            tm.assert_frame_equal(df, other)3638            assert df.equals(other)3639            assert other.equals(df)3640    def test_round_trip_equals(self, setup_path):3641        # GH 93303642        df = DataFrame({"B": [1, 2], "A": ["x", "y"]})3643        with ensure_clean_path(setup_path) as path:3644            df.to_hdf(path, "df", format="table")3645            other = read_hdf(path, "df")3646            tm.assert_frame_equal(df, other)3647            assert df.equals(other)3648            assert other.equals(df)3649    def test_preserve_timedeltaindex_type(self, setup_path):3650        # GH96353651        # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve3652        # the type of the index.3653        df = DataFrame(np.random.normal(size=(10, 5)))3654        df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example")3655        with ensure_clean_store(setup_path) as store:3656            store["df"] = df3657            tm.assert_frame_equal(store["df"], df)3658    def test_columns_multiindex_modified(self, setup_path):3659        # BUG: 72123660        # read_hdf store.select modified the passed columns parameters3661        # when multi-indexed.3662        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3663        df.index.name = "letters"3664        df = df.set_index(keys="E", append=True)3665        data_columns = df.index.names + df.columns.tolist()3666        with ensure_clean_path(setup_path) as path:3667            df.to_hdf(3668                path,3669                "df",3670                mode="a",3671                append=True,3672                data_columns=data_columns,3673                index=False,3674            )3675            cols2load = list("BCD")3676            cols2load_original = list(cols2load)3677            df_loaded = read_hdf(path, "df", columns=cols2load)  # noqa3678            assert cols2load_original == cols2load3679    @ignore_natural_naming_warning3680    def test_to_hdf_with_object_column_names(self, setup_path):3681        # GH90573682        # Writing HDF5 table format should only work for string-like3683        # column types3684        types_should_fail = [3685            tm.makeIntIndex,3686            tm.makeFloatIndex,3687            tm.makeDateIndex,3688            tm.makeTimedeltaIndex,3689            tm.makePeriodIndex,3690        ]3691        types_should_run = [3692            tm.makeStringIndex,3693            tm.makeCategoricalIndex,3694            tm.makeUnicodeIndex,3695        ]3696        for index in types_should_fail:3697            df = DataFrame(np.random.randn(10, 2), columns=index(2))3698            with ensure_clean_path(setup_path) as path:3699                with catch_warnings(record=True):3700                    msg = "cannot have non-object label DataIndexableCol"3701                    with pytest.raises(ValueError, match=msg):3702                        df.to_hdf(path, "df", format="table", data_columns=True)3703        for index in types_should_run:3704            df = DataFrame(np.random.randn(10, 2), columns=index(2))3705            with ensure_clean_path(setup_path) as path:3706                with catch_warnings(record=True):3707                    df.to_hdf(path, "df", format="table", data_columns=True)3708                    result = pd.read_hdf(path, "df", where=f"index = [{df.index[0]}]")3709                    assert len(result)3710    def test_read_hdf_open_store(self, setup_path):3711        # GH103303712        # No check for non-string path_or-buf, and no test of open store3713        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3714        df.index.name = "letters"3715        df = df.set_index(keys="E", append=True)3716        with ensure_clean_path(setup_path) as path:3717            df.to_hdf(path, "df", mode="w")3718            direct = read_hdf(path, "df")3719            store = HDFStore(path, mode="r")3720            indirect = read_hdf(store, "df")3721            tm.assert_frame_equal(direct, indirect)3722            assert store.is_open3723            store.close()3724    def test_read_hdf_iterator(self, setup_path):3725        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3726        df.index.name = "letters"3727        df = df.set_index(keys="E", append=True)3728        with ensure_clean_path(setup_path) as path:3729            df.to_hdf(path, "df", mode="w", format="t")3730            direct = read_hdf(path, "df")3731            iterator = read_hdf(path, "df", iterator=True)3732            assert isinstance(iterator, TableIterator)3733            indirect = next(iterator.__iter__())3734            tm.assert_frame_equal(direct, indirect)3735            iterator.store.close()3736    def test_read_hdf_errors(self, setup_path):3737        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3738        with ensure_clean_path(setup_path) as path:3739            with pytest.raises(IOError):3740                read_hdf(path, "key")3741            df.to_hdf(path, "df")3742            store = HDFStore(path, mode="r")3743            store.close()3744            with pytest.raises(IOError):3745                read_hdf(store, "df")3746    def test_read_hdf_generic_buffer_errors(self):3747        with pytest.raises(NotImplementedError):3748            read_hdf(BytesIO(b""), "df")3749    def test_invalid_complib(self, setup_path):3750        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3751        with ensure_clean_path(setup_path) as path:3752            with pytest.raises(ValueError):3753                df.to_hdf(path, "df", complib="foolib")3754    # GH104433755    def test_read_nokey(self, setup_path):3756        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3757        # Categorical dtype not supported for "fixed" format. So no need3758        # to test with that dtype in the dataframe here.3759        with ensure_clean_path(setup_path) as path:3760            df.to_hdf(path, "df", mode="a")3761            reread = read_hdf(path)3762            tm.assert_frame_equal(df, reread)3763            df.to_hdf(path, "df2", mode="a")3764            with pytest.raises(ValueError):3765                read_hdf(path)3766    def test_read_nokey_table(self, setup_path):3767        # GH132313768        df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})3769        with ensure_clean_path(setup_path) as path:3770            df.to_hdf(path, "df", mode="a", format="table")3771            reread = read_hdf(path)3772            tm.assert_frame_equal(df, reread)3773            df.to_hdf(path, "df2", mode="a", format="table")3774            with pytest.raises(ValueError):3775                read_hdf(path)3776    def test_read_nokey_empty(self, setup_path):3777        with ensure_clean_path(setup_path) as path:3778            store = HDFStore(path)3779            store.close()3780            with pytest.raises(ValueError):3781                read_hdf(path)3782    def test_read_from_pathlib_path(self, setup_path):3783        # GH117733784        expected = DataFrame(3785            np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")3786        )3787        with ensure_clean_path(setup_path) as filename:3788            path_obj = Path(filename)3789            expected.to_hdf(path_obj, "df", mode="a")3790            actual = read_hdf(path_obj, "df")3791        tm.assert_frame_equal(expected, actual)3792    @td.skip_if_no("py.path")3793    def test_read_from_py_localpath(self, setup_path):3794        # GH117733795        from py.path import local as LocalPath3796        expected = DataFrame(3797            np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")3798        )3799        with ensure_clean_path(setup_path) as filename:3800            path_obj = LocalPath(filename)3801            expected.to_hdf(path_obj, "df", mode="a")3802            actual = read_hdf(path_obj, "df")3803        tm.assert_frame_equal(expected, actual)3804    def test_query_long_float_literal(self, setup_path):3805        # GH 142413806        df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]})3807        with ensure_clean_store(setup_path) as store:3808            store.append("test", df, format="table", data_columns=True)3809            cutoff = 1000000000.00063810            result = store.select("test", f"A < {cutoff:.4f}")3811            assert result.empty3812            cutoff = 1000000000.00103813            result = store.select("test", f"A > {cutoff:.4f}")3814            expected = df.loc[[1, 2], :]3815            tm.assert_frame_equal(expected, result)3816            exact = 1000000000.00113817            result = store.select("test", f"A == {exact:.4f}")3818            expected = df.loc[[1], :]3819            tm.assert_frame_equal(expected, result)3820    def test_query_compare_column_type(self, setup_path):3821        # GH 154923822        df = pd.DataFrame(3823            {3824                "date": ["2014-01-01", "2014-01-02"],3825                "real_date": date_range("2014-01-01", periods=2),3826                "float": [1.1, 1.2],3827                "int": [1, 2],3828            },3829            columns=["date", "real_date", "float", "int"],3830        )3831        with ensure_clean_store(setup_path) as store:3832            store.append("test", df, format="table", data_columns=True)3833            ts = pd.Timestamp("2014-01-01")  # noqa3834            result = store.select("test", where="real_date > ts")3835            expected = df.loc[[1], :]3836            tm.assert_frame_equal(expected, result)3837            for op in ["<", ">", "=="]:3838                # non strings to string column always fail3839                for v in [2.1, True, pd.Timestamp("2014-01-01"), pd.Timedelta(1, "s")]:3840                    query = f"date {op} v"3841                    with pytest.raises(TypeError):3842                        store.select("test", where=query)3843                # strings to other columns must be convertible to type3844                v = "a"3845                for col in ["int", "float", "real_date"]:3846                    query = f"{col} {op} v"3847                    with pytest.raises(ValueError):3848                        store.select("test", where=query)3849                for v, col in zip(3850                    ["1", "1.1", "2014-01-01"], ["int", "float", "real_date"]3851                ):3852                    query = f"{col} {op} v"3853                    result = store.select("test", where=query)3854                    if op == "==":3855                        expected = df.loc[[0], :]3856                    elif op == ">":3857                        expected = df.loc[[1], :]3858                    else:3859                        expected = df.loc[[], :]3860                    tm.assert_frame_equal(expected, result)3861    @pytest.mark.parametrize("format", ["fixed", "table"])3862    def test_read_hdf_series_mode_r(self, format, setup_path):3863        # GH 165833864        # Tests that reading a Series saved to an HDF file3865        # still works if a mode='r' argument is supplied3866        series = tm.makeFloatSeries()3867        with ensure_clean_path(setup_path) as path:3868            series.to_hdf(path, key="data", format=format)3869            result = pd.read_hdf(path, key="data", mode="r")3870        tm.assert_series_equal(result, series)3871    def test_fspath(self):3872        with tm.ensure_clean("foo.h5") as path:3873            with pd.HDFStore(path) as store:3874                assert os.fspath(store) == str(path)3875    def test_read_py2_hdf_file_in_py3(self, datapath):3876        # GH 167813877        # tests reading a PeriodIndex DataFrame written in Python2 in Python33878        # the file was generated in Python 2.7 like so:3879        #3880        # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(3881        #              ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))3882        # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')3883        expected = pd.DataFrame(3884            [1.0, 2, 3],3885            index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"),3886        )3887        with ensure_clean_store(3888            datapath(3889                "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5"3890            ),3891            mode="r",3892        ) as store:3893            result = store["p"]3894            tm.assert_frame_equal(result, expected)3895    @pytest.mark.parametrize("where", ["", (), (None,), [], [None]])3896    def test_select_empty_where(self, where):3897        # GH266103898        # Using keyword `where` as '' or (), or [None], etc3899        # while reading from HDF store raises3900        # "SyntaxError: only a single expression is allowed"3901        df = pd.DataFrame([1, 2, 3])3902        with ensure_clean_path("empty_where.h5") as path:3903            with pd.HDFStore(path) as store:3904                store.put("df", df, "t")3905                result = pd.read_hdf(store, "df", where=where)3906                tm.assert_frame_equal(result, df)3907    @pytest.mark.parametrize(3908        "idx",3909        [3910            date_range("2019", freq="D", periods=3, tz="UTC"),3911            CategoricalIndex(list("abc")),3912        ],3913    )3914    def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path):3915        # GH 77753916        mi = MultiIndex.from_arrays([idx, idx])3917        df = pd.DataFrame(0, index=mi, columns=["a"])3918        with ensure_clean_path(setup_path) as path:3919            with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):3920                df.to_hdf(path, "df")3921    def test_unsuppored_hdf_file_error(self, datapath):3922        # GH 95393923        data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5")3924        message = (3925            r"Dataset\(s\) incompatible with Pandas data types, "3926            "not table, or no datasets found in HDF5 file."3927        )3928        with pytest.raises(ValueError, match=message):...

test_round_trip.py

Source:test_round_trip.py

1import datetime2import re3from warnings import (4    catch_warnings,5    simplefilter,6)7import numpy as np8import pytest9from pandas._libs.tslibs import Timestamp10from pandas.compat import is_platform_windows11import pandas as pd12from pandas import (13    DataFrame,14    Index,15    Series,16    _testing as tm,17    bdate_range,18    read_hdf,19)20from pandas.tests.io.pytables.common import (21    _maybe_remove,22    ensure_clean_path,23    ensure_clean_store,24)25from pandas.util import _test_decorators as td26_default_compressor = "blosc"27pytestmark = pytest.mark.single28def test_conv_read_write(setup_path):29    with tm.ensure_clean() as path:30        def roundtrip(key, obj, **kwargs):31            obj.to_hdf(path, key, **kwargs)32            return read_hdf(path, key)33        o = tm.makeTimeSeries()34        tm.assert_series_equal(o, roundtrip("series", o))35        o = tm.makeStringSeries()36        tm.assert_series_equal(o, roundtrip("string_series", o))37        o = tm.makeDataFrame()38        tm.assert_frame_equal(o, roundtrip("frame", o))39        # table40        df = DataFrame({"A": range(5), "B": range(5)})41        df.to_hdf(path, "table", append=True)42        result = read_hdf(path, "table", where=["index>2"])43        tm.assert_frame_equal(df[df.index > 2], result)44def test_long_strings(setup_path):45    # GH616646    df = DataFrame(47        {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)48    )49    with ensure_clean_store(setup_path) as store:50        store.append("df", df, data_columns=["a"])51        result = store.select("df")52        tm.assert_frame_equal(df, result)53def test_api(setup_path):54    # GH458455    # API issue when to_hdf doesn't accept append AND format args56    with ensure_clean_path(setup_path) as path:57        df = tm.makeDataFrame()58        df.iloc[:10].to_hdf(path, "df", append=True, format="table")59        df.iloc[10:].to_hdf(path, "df", append=True, format="table")60        tm.assert_frame_equal(read_hdf(path, "df"), df)61        # append to False62        df.iloc[:10].to_hdf(path, "df", append=False, format="table")63        df.iloc[10:].to_hdf(path, "df", append=True, format="table")64        tm.assert_frame_equal(read_hdf(path, "df"), df)65    with ensure_clean_path(setup_path) as path:66        df = tm.makeDataFrame()67        df.iloc[:10].to_hdf(path, "df", append=True)68        df.iloc[10:].to_hdf(path, "df", append=True, format="table")69        tm.assert_frame_equal(read_hdf(path, "df"), df)70        # append to False71        df.iloc[:10].to_hdf(path, "df", append=False, format="table")72        df.iloc[10:].to_hdf(path, "df", append=True)73        tm.assert_frame_equal(read_hdf(path, "df"), df)74    with ensure_clean_path(setup_path) as path:75        df = tm.makeDataFrame()76        df.to_hdf(path, "df", append=False, format="fixed")77        tm.assert_frame_equal(read_hdf(path, "df"), df)78        df.to_hdf(path, "df", append=False, format="f")79        tm.assert_frame_equal(read_hdf(path, "df"), df)80        df.to_hdf(path, "df", append=False)81        tm.assert_frame_equal(read_hdf(path, "df"), df)82        df.to_hdf(path, "df")83        tm.assert_frame_equal(read_hdf(path, "df"), df)84    with ensure_clean_store(setup_path) as store:85        df = tm.makeDataFrame()86        _maybe_remove(store, "df")87        store.append("df", df.iloc[:10], append=True, format="table")88        store.append("df", df.iloc[10:], append=True, format="table")89        tm.assert_frame_equal(store.select("df"), df)90        # append to False91        _maybe_remove(store, "df")92        store.append("df", df.iloc[:10], append=False, format="table")93        store.append("df", df.iloc[10:], append=True, format="table")94        tm.assert_frame_equal(store.select("df"), df)95        # formats96        _maybe_remove(store, "df")97        store.append("df", df.iloc[:10], append=False, format="table")98        store.append("df", df.iloc[10:], append=True, format="table")99        tm.assert_frame_equal(store.select("df"), df)100        _maybe_remove(store, "df")101        store.append("df", df.iloc[:10], append=False, format="table")102        store.append("df", df.iloc[10:], append=True, format=None)103        tm.assert_frame_equal(store.select("df"), df)104    with ensure_clean_path(setup_path) as path:105        # Invalid.106        df = tm.makeDataFrame()107        msg = "Can only append to Tables"108        with pytest.raises(ValueError, match=msg):109            df.to_hdf(path, "df", append=True, format="f")110        with pytest.raises(ValueError, match=msg):111            df.to_hdf(path, "df", append=True, format="fixed")112        msg = r"invalid HDFStore format specified \[foo\]"113        with pytest.raises(TypeError, match=msg):114            df.to_hdf(path, "df", append=True, format="foo")115        with pytest.raises(TypeError, match=msg):116            df.to_hdf(path, "df", append=False, format="foo")117    # File path doesn't exist118    path = ""119    msg = f"File {path} does not exist"120    with pytest.raises(FileNotFoundError, match=msg):121        read_hdf(path, "df")122def test_get(setup_path):123    with ensure_clean_store(setup_path) as store:124        store["a"] = tm.makeTimeSeries()125        left = store.get("a")126        right = store["a"]127        tm.assert_series_equal(left, right)128        left = store.get("/a")129        right = store["/a"]130        tm.assert_series_equal(left, right)131        with pytest.raises(KeyError, match="'No object named b in the file'"):132            store.get("b")133def test_put_integer(setup_path):134    # non-date, non-string index135    df = DataFrame(np.random.randn(50, 100))136    _check_roundtrip(df, tm.assert_frame_equal, setup_path)137def test_table_values_dtypes_roundtrip(setup_path):138    with ensure_clean_store(setup_path) as store:139        df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")140        store.append("df_f8", df1)141        tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)142        df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8")143        store.append("df_i8", df2)144        tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)145        # incompatible dtype146        msg = re.escape(147            "invalid combination of [values_axes] on appending data "148            "[name->values_block_0,cname->values_block_0,"149            "dtype->float64,kind->float,shape->(1, 3)] vs "150            "current table [name->values_block_0,"151            "cname->values_block_0,dtype->int64,kind->integer,"152            "shape->None]"153        )154        with pytest.raises(ValueError, match=msg):155            store.append("df_i8", df1)156        # check creation/storage/retrieval of float32 (a bit hacky to157        # actually create them thought)158        df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"])159        store.append("df_f4", df1)160        tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes)161        assert df1.dtypes[0] == "float32"162        # check with mixed dtypes163        df1 = DataFrame(164            {165                c: Series(np.random.randint(5), dtype=c)166                for c in ["float32", "float64", "int32", "int64", "int16", "int8"]167            }168        )169        df1["string"] = "foo"170        df1["float322"] = 1.0171        df1["float322"] = df1["float322"].astype("float32")172        df1["bool"] = df1["float32"] > 0173        df1["time1"] = Timestamp("20130101")174        df1["time2"] = Timestamp("20130102")175        store.append("df_mixed_dtypes1", df1)176        result = store.select("df_mixed_dtypes1").dtypes.value_counts()177        result.index = [str(i) for i in result.index]178        expected = Series(179            {180                "float32": 2,181                "float64": 1,182                "int32": 1,183                "bool": 1,184                "int16": 1,185                "int8": 1,186                "int64": 1,187                "object": 1,188                "datetime64[ns]": 2,189            }190        )191        result = result.sort_index()192        expected = expected.sort_index()193        tm.assert_series_equal(result, expected)194def test_series(setup_path):195    s = tm.makeStringSeries()196    _check_roundtrip(s, tm.assert_series_equal, path=setup_path)197    ts = tm.makeTimeSeries()198    _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)199    ts2 = Series(ts.index, Index(ts.index, dtype=object))200    _check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)201    ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))202    _check_roundtrip(203        ts3, tm.assert_series_equal, path=setup_path, check_index_type=False204    )205def test_float_index(setup_path):206    # GH #454207    index = np.random.randn(10)208    s = Series(np.random.randn(10), index=index)209    _check_roundtrip(s, tm.assert_series_equal, path=setup_path)210def test_tuple_index(setup_path):211    # GH #492212    col = np.arange(10)213    idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]214    data = np.random.randn(30).reshape((3, 10))215    DF = DataFrame(data, index=idx, columns=col)216    with catch_warnings(record=True):217        simplefilter("ignore", pd.errors.PerformanceWarning)218        _check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)219@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")220def test_index_types(setup_path):221    with catch_warnings(record=True):222        values = np.random.randn(2)223        func = lambda l, r: tm.assert_series_equal(l, r, check_index_type=True)224    with catch_warnings(record=True):225        ser = Series(values, [0, "y"])226        _check_roundtrip(ser, func, path=setup_path)227    with catch_warnings(record=True):228        ser = Series(values, [datetime.datetime.today(), 0])229        _check_roundtrip(ser, func, path=setup_path)230    with catch_warnings(record=True):231        ser = Series(values, ["y", 0])232        _check_roundtrip(ser, func, path=setup_path)233    with catch_warnings(record=True):234        ser = Series(values, [datetime.date.today(), "a"])235        _check_roundtrip(ser, func, path=setup_path)236    with catch_warnings(record=True):237        ser = Series(values, [0, "y"])238        _check_roundtrip(ser, func, path=setup_path)239        ser = Series(values, [datetime.datetime.today(), 0])240        _check_roundtrip(ser, func, path=setup_path)241        ser = Series(values, ["y", 0])242        _check_roundtrip(ser, func, path=setup_path)243        ser = Series(values, [datetime.date.today(), "a"])244        _check_roundtrip(ser, func, path=setup_path)245        ser = Series(values, [1.23, "b"])246        _check_roundtrip(ser, func, path=setup_path)247        ser = Series(values, [1, 1.53])248        _check_roundtrip(ser, func, path=setup_path)249        ser = Series(values, [1, 5])250        _check_roundtrip(ser, func, path=setup_path)251        ser = Series(252            values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)]253        )254        _check_roundtrip(ser, func, path=setup_path)255def test_timeseries_preepoch(setup_path):256    dr = bdate_range("1/1/1940", "1/1/1960")257    ts = Series(np.random.randn(len(dr)), index=dr)258    try:259        _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)260    except OverflowError:261        if is_platform_windows():262            pytest.xfail("known failure on some windows platforms")263        else:264            raise265@pytest.mark.parametrize(266    "compression", [False, pytest.param(True, marks=td.skip_if_windows)]267)268def test_frame(compression, setup_path):269    df = tm.makeDataFrame()270    # put in some random NAs271    df.values[0, 0] = np.nan272    df.values[5, 3] = np.nan273    _check_roundtrip_table(274        df, tm.assert_frame_equal, path=setup_path, compression=compression275    )276    _check_roundtrip(277        df, tm.assert_frame_equal, path=setup_path, compression=compression278    )279    tdf = tm.makeTimeDataFrame()280    _check_roundtrip(281        tdf, tm.assert_frame_equal, path=setup_path, compression=compression282    )283    with ensure_clean_store(setup_path) as store:284        # not consolidated285        df["foo"] = np.random.randn(len(df))286        store["df"] = df287        recons = store["df"]288        assert recons._mgr.is_consolidated()289    # empty290    _check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)291def test_empty_series_frame(setup_path):292    s0 = Series(dtype=object)293    s1 = Series(name="myseries", dtype=object)294    df0 = DataFrame()295    df1 = DataFrame(index=["a", "b", "c"])296    df2 = DataFrame(columns=["d", "e", "f"])297    _check_roundtrip(s0, tm.assert_series_equal, path=setup_path)298    _check_roundtrip(s1, tm.assert_series_equal, path=setup_path)299    _check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)300    _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)301    _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)302@pytest.mark.parametrize("dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"])303def test_empty_series(dtype, setup_path):304    s = Series(dtype=dtype)305    _check_roundtrip(s, tm.assert_series_equal, path=setup_path)306def test_can_serialize_dates(setup_path):307    rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]308    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)309    _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)310def test_store_hierarchical(setup_path, multiindex_dataframe_random_data):311    frame = multiindex_dataframe_random_data312    _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)313    _check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)314    _check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)315    # check that the names are stored316    with ensure_clean_store(setup_path) as store:317        store["frame"] = frame318        recons = store["frame"]319        tm.assert_frame_equal(recons, frame)320@pytest.mark.parametrize(321    "compression", [False, pytest.param(True, marks=td.skip_if_windows)]322)323def test_store_mixed(compression, setup_path):324    def _make_one():325        df = tm.makeDataFrame()326        df["obj1"] = "foo"327        df["obj2"] = "bar"328        df["bool1"] = df["A"] > 0329        df["bool2"] = df["B"] > 0330        df["int1"] = 1331        df["int2"] = 2332        return df._consolidate()333    df1 = _make_one()334    df2 = _make_one()335    _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)336    _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)337    with ensure_clean_store(setup_path) as store:338        store["obj"] = df1339        tm.assert_frame_equal(store["obj"], df1)340        store["obj"] = df2341        tm.assert_frame_equal(store["obj"], df2)342    # check that can store Series of all of these types343    _check_roundtrip(344        df1["obj1"],345        tm.assert_series_equal,346        path=setup_path,347        compression=compression,348    )349    _check_roundtrip(350        df1["bool1"],351        tm.assert_series_equal,352        path=setup_path,353        compression=compression,354    )355    _check_roundtrip(356        df1["int1"],357        tm.assert_series_equal,358        path=setup_path,359        compression=compression,360    )361def _check_roundtrip(obj, comparator, path, compression=False, **kwargs):362    options = {}363    if compression:364        options["complib"] = _default_compressor365    with ensure_clean_store(path, "w", **options) as store:366        store["obj"] = obj367        retrieved = store["obj"]368        comparator(retrieved, obj, **kwargs)369def _check_double_roundtrip(self, obj, comparator, path, compression=False, **kwargs):370    options = {}371    if compression:372        options["complib"] = compression or _default_compressor373    with ensure_clean_store(path, "w", **options) as store:374        store["obj"] = obj375        retrieved = store["obj"]376        comparator(retrieved, obj, **kwargs)377        store["obj"] = retrieved378        again = store["obj"]379        comparator(again, obj, **kwargs)380def _check_roundtrip_table(obj, comparator, path, compression=False):381    options = {}382    if compression:383        options["complib"] = _default_compressor384    with ensure_clean_store(path, "w", **options) as store:385        store.put("obj", obj, format="table")386        retrieved = store["obj"]387        comparator(retrieved, obj)388def test_unicode_index(setup_path):389    unicode_values = ["\u03c3", "\u03c3\u03c3"]390    # PerformanceWarning391    with catch_warnings(record=True):392        simplefilter("ignore", pd.errors.PerformanceWarning)393        s = Series(np.random.randn(len(unicode_values)), unicode_values)394        _check_roundtrip(s, tm.assert_series_equal, path=setup_path)395def test_unicode_longer_encoded(setup_path):396    # GH 11234397    char = "\u0394"398    df = DataFrame({"A": [char]})399    with ensure_clean_store(setup_path) as store:400        store.put("df", df, format="table", encoding="utf-8")401        result = store.get("df")402        tm.assert_frame_equal(result, df)403    df = DataFrame({"A": ["a", char], "B": ["b", "b"]})404    with ensure_clean_store(setup_path) as store:405        store.put("df", df, format="table", encoding="utf-8")406        result = store.get("df")407        tm.assert_frame_equal(result, df)408def test_store_datetime_mixed(setup_path):409    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})410    ts = tm.makeTimeSeries()411    df["d"] = ts.index[:3]412    _check_roundtrip(df, tm.assert_frame_equal, path=setup_path)413def test_round_trip_equals(setup_path):414    # GH 9330415    df = DataFrame({"B": [1, 2], "A": ["x", "y"]})416    with ensure_clean_path(setup_path) as path:417        df.to_hdf(path, "df", format="table")418        other = read_hdf(path, "df")419        tm.assert_frame_equal(df, other)420        assert df.equals(other)...

test_timezones.py

Source:test_timezones.py

1import datetime23import numpy as np4import pytest56import pandas.util._test_decorators as td78import pandas as pd9from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range10import pandas._testing as tm11from pandas.tests.io.pytables.common import (12    _maybe_remove,13    ensure_clean_path,14    ensure_clean_store,15)161718def _compare_with_tz(a, b):19    tm.assert_frame_equal(a, b)2021    # compare the zones on each element22    for c in a.columns:23        for i in a.index:24            a_e = a.loc[i, c]25            b_e = b.loc[i, c]26            if not (a_e == b_e and a_e.tz == b_e.tz):27                raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")282930def test_append_with_timezones_dateutil(setup_path):3132    from datetime import timedelta3334    # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows35    # filename issues.36    from pandas._libs.tslibs.timezones import maybe_get_tz3738    gettz = lambda x: maybe_get_tz("dateutil/" + x)3940    # as columns41    with ensure_clean_store(setup_path) as store:4243        _maybe_remove(store, "df_tz")44        df = DataFrame(45            {46                "A": [47                    Timestamp("20130102 2:00:00", tz=gettz("US/Eastern"))48                    + timedelta(hours=1) * i49                    for i in range(5)50                ]51            }52        )5354        store.append("df_tz", df, data_columns=["A"])55        result = store["df_tz"]56        _compare_with_tz(result, df)57        tm.assert_frame_equal(result, df)5859        # select with tz aware60        expected = df[df.A >= df.A[3]]61        result = store.select("df_tz", where="A>=df.A[3]")62        _compare_with_tz(result, expected)6364        # ensure we include dates in DST and STD time here.65        _maybe_remove(store, "df_tz")66        df = DataFrame(67            {68                "A": Timestamp("20130102", tz=gettz("US/Eastern")),69                "B": Timestamp("20130603", tz=gettz("US/Eastern")),70            },71            index=range(5),72        )73        store.append("df_tz", df)74        result = store["df_tz"]75        _compare_with_tz(result, df)76        tm.assert_frame_equal(result, df)7778        df = DataFrame(79            {80                "A": Timestamp("20130102", tz=gettz("US/Eastern")),81                "B": Timestamp("20130102", tz=gettz("EET")),82            },83            index=range(5),84        )8586        msg = (87            r"invalid info for \[values_block_1\] for \[tz\], "88            r"existing_value \[dateutil/.*US/Eastern\] "89            r"conflicts with new value \[dateutil/.*EET\]"90        )91        with pytest.raises(ValueError, match=msg):92            store.append("df_tz", df)9394        # this is ok95        _maybe_remove(store, "df_tz")96        store.append("df_tz", df, data_columns=["A", "B"])97        result = store["df_tz"]98        _compare_with_tz(result, df)99        tm.assert_frame_equal(result, df)100101        # can't append with diff timezone102        df = DataFrame(103            {104                "A": Timestamp("20130102", tz=gettz("US/Eastern")),105                "B": Timestamp("20130102", tz=gettz("CET")),106            },107            index=range(5),108        )109110        msg = (111            r"invalid info for \[B\] for \[tz\], "112            r"existing_value \[dateutil/.*EET\] "113            r"conflicts with new value \[dateutil/.*CET\]"114        )115        with pytest.raises(ValueError, match=msg):116            store.append("df_tz", df)117118    # as index119    with ensure_clean_store(setup_path) as store:120121        dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern"))122        dti = dti._with_freq(None)  # freq doesnt round-trip123124        # GH 4098 example125        df = DataFrame({"A": Series(range(3), index=dti)})126127        _maybe_remove(store, "df")128        store.put("df", df)129        result = store.select("df")130        tm.assert_frame_equal(result, df)131132        _maybe_remove(store, "df")133        store.append("df", df)134        result = store.select("df")135        tm.assert_frame_equal(result, df)136137138def test_append_with_timezones_pytz(setup_path):139140    from datetime import timedelta141142    # as columns143    with ensure_clean_store(setup_path) as store:144145        _maybe_remove(store, "df_tz")146        df = DataFrame(147            {148                "A": [149                    Timestamp("20130102 2:00:00", tz="US/Eastern")150                    + timedelta(hours=1) * i151                    for i in range(5)152                ]153            }154        )155        store.append("df_tz", df, data_columns=["A"])156        result = store["df_tz"]157        _compare_with_tz(result, df)158        tm.assert_frame_equal(result, df)159160        # select with tz aware161        _compare_with_tz(store.select("df_tz", where="A>=df.A[3]"), df[df.A >= df.A[3]])162163        _maybe_remove(store, "df_tz")164        # ensure we include dates in DST and STD time here.165        df = DataFrame(166            {167                "A": Timestamp("20130102", tz="US/Eastern"),168                "B": Timestamp("20130603", tz="US/Eastern"),169            },170            index=range(5),171        )172        store.append("df_tz", df)173        result = store["df_tz"]174        _compare_with_tz(result, df)175        tm.assert_frame_equal(result, df)176177        df = DataFrame(178            {179                "A": Timestamp("20130102", tz="US/Eastern"),180                "B": Timestamp("20130102", tz="EET"),181            },182            index=range(5),183        )184185        msg = (186            r"invalid info for \[values_block_1\] for \[tz\], "187            r"existing_value \[US/Eastern\] conflicts with new value \[EET\]"188        )189        with pytest.raises(ValueError, match=msg):190            store.append("df_tz", df)191192        # this is ok193        _maybe_remove(store, "df_tz")194        store.append("df_tz", df, data_columns=["A", "B"])195        result = store["df_tz"]196        _compare_with_tz(result, df)197        tm.assert_frame_equal(result, df)198199        # can't append with diff timezone200        df = DataFrame(201            {202                "A": Timestamp("20130102", tz="US/Eastern"),203                "B": Timestamp("20130102", tz="CET"),204            },205            index=range(5),206        )207208        msg = (209            r"invalid info for \[B\] for \[tz\], "210            r"existing_value \[EET\] conflicts with new value \[CET\]"211        )212        with pytest.raises(ValueError, match=msg):213            store.append("df_tz", df)214215    # as index216    with ensure_clean_store(setup_path) as store:217218        dti = date_range("2000-1-1", periods=3, freq="H", tz="US/Eastern")219        dti = dti._with_freq(None)  # freq doesnt round-trip220221        # GH 4098 example222        df = DataFrame({"A": Series(range(3), index=dti)})223224        _maybe_remove(store, "df")225        store.put("df", df)226        result = store.select("df")227        tm.assert_frame_equal(result, df)228229        _maybe_remove(store, "df")230        store.append("df", df)231        result = store.select("df")232        tm.assert_frame_equal(result, df)233234235def test_roundtrip_tz_aware_index(setup_path):236    # GH 17618237    time = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")238    df = DataFrame(data=[0], index=[time])239240    with ensure_clean_store(setup_path) as store:241        store.put("frame", df, format="fixed")242        recons = store["frame"]243        tm.assert_frame_equal(recons, df)244        assert recons.index[0].value == 946706400000000000245246247def test_store_index_name_with_tz(setup_path):248    # GH 13884249    df = DataFrame({"A": [1, 2]})250    df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])251    df.index = df.index.tz_localize("UTC")252    df.index.name = "foo"253254    with ensure_clean_store(setup_path) as store:255        store.put("frame", df, format="table")256        recons = store["frame"]257        tm.assert_frame_equal(recons, df)258259260def test_tseries_select_index_column(setup_path):261    # GH7777262    # selecting a UTC datetimeindex column did263    # not preserve UTC tzinfo set before storing264265    # check that no tz still works266    rng = date_range("1/1/2000", "1/30/2000")267    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)268269    with ensure_clean_store(setup_path) as store:270        store.append("frame", frame)271        result = store.select_column("frame", "index")272        assert rng.tz == DatetimeIndex(result.values).tz273274    # check utc275    rng = date_range("1/1/2000", "1/30/2000", tz="UTC")276    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)277278    with ensure_clean_store(setup_path) as store:279        store.append("frame", frame)280        result = store.select_column("frame", "index")281        assert rng.tz == result.dt.tz282283    # double check non-utc284    rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")285    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)286287    with ensure_clean_store(setup_path) as store:288        store.append("frame", frame)289        result = store.select_column("frame", "index")290        assert rng.tz == result.dt.tz291292293def test_timezones_fixed_format_frame_non_empty(setup_path):294    with ensure_clean_store(setup_path) as store:295296        # index297        rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")298        rng = rng._with_freq(None)  # freq doesnt round-trip299        df = DataFrame(np.random.randn(len(rng), 4), index=rng)300        store["df"] = df301        result = store["df"]302        tm.assert_frame_equal(result, df)303304        # as data305        # GH11411306        _maybe_remove(store, "df")307        df = DataFrame(308            {309                "A": rng,310                "B": rng.tz_convert("UTC").tz_localize(None),311                "C": rng.tz_convert("CET"),312                "D": range(len(rng)),313            },314            index=rng,315        )316        store["df"] = df317        result = store["df"]318        tm.assert_frame_equal(result, df)319320321def test_timezones_fixed_format_frame_empty(setup_path, tz_aware_fixture):322    # GH 20594323324    dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)325326    with ensure_clean_store(setup_path) as store:327        s = Series(dtype=dtype)328        df = DataFrame({"A": s})329        store["df"] = df330        result = store["df"]331        tm.assert_frame_equal(result, df)332333334def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):335    # GH 20594336337    dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)338339    with ensure_clean_store(setup_path) as store:340        s = Series([0], dtype=dtype)341        store["s"] = s342        result = store["s"]343        tm.assert_series_equal(result, s)344345346def test_timezones_fixed_format_series_empty(setup_path, tz_aware_fixture):347    # GH 20594348349    dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)350351    with ensure_clean_store(setup_path) as store:352        s = Series(dtype=dtype)353        store["s"] = s354        result = store["s"]355        tm.assert_series_equal(result, s)356357358def test_fixed_offset_tz(setup_path):359    rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")360    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)361362    with ensure_clean_store(setup_path) as store:363        store["frame"] = frame364        recons = store["frame"]365        tm.assert_index_equal(recons.index, rng)366        assert rng.tz == recons.index.tz367368369@td.skip_if_windows370def test_store_timezone(setup_path):371    # GH2852372    # issue storing datetime.date with a timezone as it resets when read373    # back in a new timezone374375    # original method376    with ensure_clean_store(setup_path) as store:377378        today = datetime.date(2013, 9, 10)379        df = DataFrame([1, 2, 3], index=[today, today, today])380        store["obj1"] = df381        result = store["obj1"]382        tm.assert_frame_equal(result, df)383384    # with tz setting385    with ensure_clean_store(setup_path) as store:386387        with tm.set_timezone("EST5EDT"):388            today = datetime.date(2013, 9, 10)389            df = DataFrame([1, 2, 3], index=[today, today, today])390            store["obj1"] = df391392        with tm.set_timezone("CST6CDT"):393            result = store["obj1"]394395        tm.assert_frame_equal(result, df)396397398def test_legacy_datetimetz_object(datapath, setup_path):399    # legacy from < 0.17.0400    # 8260401    expected = DataFrame(402        {403            "A": Timestamp("20130102", tz="US/Eastern"),404            "B": Timestamp("20130603", tz="CET"),405        },406        index=range(5),407    )408    with ensure_clean_store(409        datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"410    ) as store:411        result = store["df"]412        tm.assert_frame_equal(result, expected)413414415def test_dst_transitions(setup_path):416    # make sure we are not failing on transitions417    with ensure_clean_store(setup_path) as store:418        times = pd.date_range(419            "2013-10-26 23:00",420            "2013-10-27 01:00",421            tz="Europe/London",422            freq="H",423            ambiguous="infer",424        )425        times = times._with_freq(None)  # freq doesnt round-trip426427        for i in [times, times + pd.Timedelta("10min")]:428            _maybe_remove(store, "df")429            df = DataFrame({"A": range(len(i)), "B": i}, index=i)430            store.append("df", df)431            result = store.select("df")432            tm.assert_frame_equal(result, df)433434435def test_read_with_where_tz_aware_index(setup_path):436    # GH 11926437    periods = 10438    dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC")439    mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])440    expected = DataFrame({"MYCOL": 0}, index=mi)441442    key = "mykey"443    with ensure_clean_path(setup_path) as path:444        with pd.HDFStore(path) as store:445            store.append(key, expected, format="table", append=True)446        result = pd.read_hdf(path, key, where="DATE > 20151130")447        tm.assert_frame_equal(result, expected)448449450def test_py2_created_with_datetimez(datapath, setup_path):451    # The test HDF5 file was created in Python 2, but could not be read in452    # Python 3.453    #454    # GH26443455    index = [Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]456    expected = DataFrame({"data": 123}, index=index)457    with ensure_clean_store(458        datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"459    ) as store:460        result = store["key"]
...

test_complex.py

Source:test_complex.py

1from warnings import catch_warnings2import numpy as np3import pytest4import pandas.util._test_decorators as td5import pandas as pd6from pandas import DataFrame, Series7import pandas._testing as tm8from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store9from pandas.io.pytables import read_hdf10# GH1044711def test_complex_fixed(setup_path):12    df = DataFrame(13        np.random.rand(4, 5).astype(np.complex64),14        index=list("abcd"),15        columns=list("ABCDE"),16    )17    with ensure_clean_path(setup_path) as path:18        df.to_hdf(path, "df")19        reread = read_hdf(path, "df")20        tm.assert_frame_equal(df, reread)21    df = DataFrame(22        np.random.rand(4, 5).astype(np.complex128),23        index=list("abcd"),24        columns=list("ABCDE"),25    )26    with ensure_clean_path(setup_path) as path:27        df.to_hdf(path, "df")28        reread = read_hdf(path, "df")29        tm.assert_frame_equal(df, reread)30def test_complex_table(setup_path):31    df = DataFrame(32        np.random.rand(4, 5).astype(np.complex64),33        index=list("abcd"),34        columns=list("ABCDE"),35    )36    with ensure_clean_path(setup_path) as path:37        df.to_hdf(path, "df", format="table")38        reread = read_hdf(path, "df")39        tm.assert_frame_equal(df, reread)40    df = DataFrame(41        np.random.rand(4, 5).astype(np.complex128),42        index=list("abcd"),43        columns=list("ABCDE"),44    )45    with ensure_clean_path(setup_path) as path:46        df.to_hdf(path, "df", format="table", mode="w")47        reread = read_hdf(path, "df")48        tm.assert_frame_equal(df, reread)49@td.xfail_non_writeable50def test_complex_mixed_fixed(setup_path):51    complex64 = np.array(52        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex6453    )54    complex128 = np.array(55        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex12856    )57    df = DataFrame(58        {59            "A": [1, 2, 3, 4],60            "B": ["a", "b", "c", "d"],61            "C": complex64,62            "D": complex128,63            "E": [1.0, 2.0, 3.0, 4.0],64        },65        index=list("abcd"),66    )67    with ensure_clean_path(setup_path) as path:68        df.to_hdf(path, "df")69        reread = read_hdf(path, "df")70        tm.assert_frame_equal(df, reread)71def test_complex_mixed_table(setup_path):72    complex64 = np.array(73        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex6474    )75    complex128 = np.array(76        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex12877    )78    df = DataFrame(79        {80            "A": [1, 2, 3, 4],81            "B": ["a", "b", "c", "d"],82            "C": complex64,83            "D": complex128,84            "E": [1.0, 2.0, 3.0, 4.0],85        },86        index=list("abcd"),87    )88    with ensure_clean_store(setup_path) as store:89        store.append("df", df, data_columns=["A", "B"])90        result = store.select("df", where="A>2")91        tm.assert_frame_equal(df.loc[df.A > 2], result)92    with ensure_clean_path(setup_path) as path:93        df.to_hdf(path, "df", format="table")94        reread = read_hdf(path, "df")95        tm.assert_frame_equal(df, reread)96def test_complex_across_dimensions_fixed(setup_path):97    with catch_warnings(record=True):98        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])99        s = Series(complex128, index=list("abcd"))100        df = DataFrame({"A": s, "B": s})101        objs = [s, df]102        comps = [tm.assert_series_equal, tm.assert_frame_equal]103        for obj, comp in zip(objs, comps):104            with ensure_clean_path(setup_path) as path:105                obj.to_hdf(path, "obj", format="fixed")106                reread = read_hdf(path, "obj")107                comp(obj, reread)108def test_complex_across_dimensions(setup_path):109    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])110    s = Series(complex128, index=list("abcd"))111    df = DataFrame({"A": s, "B": s})112    with catch_warnings(record=True):113        objs = [df]114        comps = [tm.assert_frame_equal]115        for obj, comp in zip(objs, comps):116            with ensure_clean_path(setup_path) as path:117                obj.to_hdf(path, "obj", format="table")118                reread = read_hdf(path, "obj")119                comp(obj, reread)120def test_complex_indexing_error(setup_path):121    complex128 = np.array(122        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128123    )124    df = DataFrame(125        {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128},126        index=list("abcd"),127    )128    with ensure_clean_store(setup_path) as store:129        with pytest.raises(TypeError):130            store.append("df", df, data_columns=["C"])131def test_complex_series_error(setup_path):132    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])133    s = Series(complex128, index=list("abcd"))134    with ensure_clean_path(setup_path) as path:135        with pytest.raises(TypeError):136            s.to_hdf(path, "obj", format="t")137    with ensure_clean_path(setup_path) as path:138        s.to_hdf(path, "obj", format="t", index=False)139        reread = read_hdf(path, "obj")140        tm.assert_series_equal(s, reread)141def test_complex_append(setup_path):142    df = DataFrame(143        {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)}144    )145    with ensure_clean_store(setup_path) as store:146        store.append("df", df, data_columns=["b"])147        store.append("df", df)148        result = store.select("df")...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.