How to use from_records method in pandera

Best Python code snippet using pandera_python

test_from_records.py

Source:test_from_records.py Github

copy

Full Screen

...24 expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]})25 arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]26 dtypes = [("EXPIRY", "<M8[ns]")]27 recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)28 result = DataFrame.from_records(recarray)29 tm.assert_frame_equal(result, expected)30 # coercion should work too31 arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]32 dtypes = [("EXPIRY", "<M8[m]")]33 recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)34 result = DataFrame.from_records(recarray)35 tm.assert_frame_equal(result, expected)36 def test_from_records_sequencelike(self):37 df = DataFrame(38 {39 "A": np.array(np.random.randn(6), dtype=np.float64),40 "A1": np.array(np.random.randn(6), dtype=np.float64),41 "B": np.array(np.arange(6), dtype=np.int64),42 "C": ["foo"] * 6,43 "D": np.array([True, False] * 3, dtype=bool),44 "E": np.array(np.random.randn(6), dtype=np.float32),45 "E1": np.array(np.random.randn(6), dtype=np.float32),46 "F": np.array(np.arange(6), dtype=np.int32),47 }48 )49 # this is actually tricky to create the recordlike arrays and50 # have the dtypes be intact51 blocks = df._to_dict_of_blocks()52 tuples = []53 columns = []54 dtypes = []55 for dtype, b in blocks.items():56 columns.extend(b.columns)57 dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns])58 for i in range(len(df.index)):59 tup = []60 for _, b in blocks.items():61 tup.extend(b.iloc[i].values)62 tuples.append(tuple(tup))63 recarray = np.array(tuples, dtype=dtypes).view(np.recarray)64 recarray2 = df.to_records()65 lists = [list(x) for x in tuples]66 # tuples (lose the dtype info)67 result = DataFrame.from_records(tuples, columns=columns).reindex(68 columns=df.columns69 )70 # created recarray and with to_records recarray (have dtype info)71 result2 = DataFrame.from_records(recarray, columns=columns).reindex(72 columns=df.columns73 )74 result3 = DataFrame.from_records(recarray2, columns=columns).reindex(75 columns=df.columns76 )77 # list of tupels (no dtype info)78 result4 = DataFrame.from_records(lists, columns=columns).reindex(79 columns=df.columns80 )81 tm.assert_frame_equal(result, df, check_dtype=False)82 tm.assert_frame_equal(result2, df)83 tm.assert_frame_equal(result3, df)84 tm.assert_frame_equal(result4, df, check_dtype=False)85 # tuples is in the order of the columns86 result = DataFrame.from_records(tuples)87 tm.assert_index_equal(result.columns, RangeIndex(8))88 # test exclude parameter & we are casting the results here (as we don't89 # have dtype info to recover)90 columns_to_test = [columns.index("C"), columns.index("E1")]91 exclude = list(set(range(8)) - set(columns_to_test))92 result = DataFrame.from_records(tuples, exclude=exclude)93 result.columns = [columns[i] for i in sorted(columns_to_test)]94 tm.assert_series_equal(result["C"], df["C"])95 tm.assert_series_equal(result["E1"], df["E1"])96 def test_from_records_sequencelike_empty(self):97 # empty case98 result = DataFrame.from_records([], columns=["foo", "bar", "baz"])99 assert len(result) == 0100 tm.assert_index_equal(result.columns, Index(["foo", "bar", "baz"]))101 result = DataFrame.from_records([])102 assert len(result) == 0103 assert len(result.columns) == 0104 def test_from_records_dictlike(self):105 # test the dict methods106 df = DataFrame(107 {108 "A": np.array(np.random.randn(6), dtype=np.float64),109 "A1": np.array(np.random.randn(6), dtype=np.float64),110 "B": np.array(np.arange(6), dtype=np.int64),111 "C": ["foo"] * 6,112 "D": np.array([True, False] * 3, dtype=bool),113 "E": np.array(np.random.randn(6), dtype=np.float32),114 "E1": np.array(np.random.randn(6), dtype=np.float32),115 "F": np.array(np.arange(6), dtype=np.int32),116 }117 )118 # columns is in a different order here than the actual items iterated119 # from the dict120 blocks = df._to_dict_of_blocks()121 columns = []122 for b in blocks.values():123 columns.extend(b.columns)124 asdict = {x: y for x, y in df.items()}125 asdict2 = {x: y.values for x, y in df.items()}126 # dict of series & dict of ndarrays (have dtype info)127 results = []128 results.append(DataFrame.from_records(asdict).reindex(columns=df.columns))129 results.append(130 DataFrame.from_records(asdict, columns=columns).reindex(columns=df.columns)131 )132 results.append(133 DataFrame.from_records(asdict2, columns=columns).reindex(columns=df.columns)134 )135 for r in results:136 tm.assert_frame_equal(r, df)137 def test_from_records_with_index_data(self):138 df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"])139 data = np.random.randn(10)140 df1 = DataFrame.from_records(df, index=data)141 tm.assert_index_equal(df1.index, Index(data))142 def test_from_records_bad_index_column(self):143 df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"])144 # should pass145 df1 = DataFrame.from_records(df, index=["C"])146 tm.assert_index_equal(df1.index, Index(df.C))147 df1 = DataFrame.from_records(df, index="C")148 tm.assert_index_equal(df1.index, Index(df.C))149 # should fail150 msg = "|".join(151 [152 r"Length of values \(10\) does not match length of index \(1\)",153 ]154 )155 with pytest.raises(ValueError, match=msg):156 DataFrame.from_records(df, index=[2])157 with pytest.raises(KeyError, match=r"^2$"):158 DataFrame.from_records(df, index=2)159 def test_from_records_non_tuple(self):160 class Record:161 def __init__(self, *args):162 self.args = args163 def __getitem__(self, i):164 return self.args[i]165 def __iter__(self):166 return iter(self.args)167 recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)]168 tups = [tuple(rec) for rec in recs]169 result = DataFrame.from_records(recs)170 expected = DataFrame.from_records(tups)171 tm.assert_frame_equal(result, expected)172 def test_from_records_len0_with_columns(self):173 # GH#2633174 result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])175 expected = Index(["bar"])176 assert len(result) == 0177 assert result.index.name == "foo"178 tm.assert_index_equal(result.columns, expected)179 def test_from_records_series_list_dict(self):180 # GH#27358181 expected = DataFrame([[{"a": 1, "b": 2}, {"a": 3, "b": 4}]]).T182 data = Series([[{"a": 1, "b": 2}], [{"a": 3, "b": 4}]])183 result = DataFrame.from_records(data)184 tm.assert_frame_equal(result, expected)185 def test_from_records_series_categorical_index(self):186 # GH#32805187 index = CategoricalIndex(188 [Interval(-20, -10), Interval(-10, 0), Interval(0, 10)]189 )190 series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index)191 frame = DataFrame.from_records(series_of_dicts, index=index)192 expected = DataFrame(193 {"a": [1, 2, np.NaN], "b": [np.NaN, np.NaN, 3]}, index=index194 )195 tm.assert_frame_equal(frame, expected)196 def test_frame_from_records_utc(self):197 rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)}198 # it works199 DataFrame.from_records([rec], index="begin_time")200 def test_from_records_to_records(self):201 # from numpy documentation202 arr = np.zeros((2,), dtype=("i4,f4,a10"))203 arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]204 # TODO(wesm): unused205 frame = DataFrame.from_records(arr) # noqa206 index = Index(np.arange(len(arr))[::-1])207 indexed_frame = DataFrame.from_records(arr, index=index)208 tm.assert_index_equal(indexed_frame.index, index)209 # without names, it should go to last ditch210 arr2 = np.zeros((2, 3))211 tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))212 # wrong length213 msg = "|".join(214 [215 r"Length of values \(2\) does not match length of index \(1\)",216 ]217 )218 with pytest.raises(ValueError, match=msg):219 DataFrame.from_records(arr, index=index[:-1])220 indexed_frame = DataFrame.from_records(arr, index="f1")221 # what to do?222 records = indexed_frame.to_records()223 assert len(records.dtype.names) == 3224 records = indexed_frame.to_records(index=False)225 assert len(records.dtype.names) == 2226 assert "index" not in records.dtype.names227 def test_from_records_nones(self):228 tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)]229 df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"])230 assert np.isnan(df["c"][0])231 def test_from_records_iterator(self):232 arr = np.array(233 [(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)],234 dtype=[235 ("x", np.float64),236 ("u", np.float32),237 ("y", np.int64),238 ("z", np.int32),239 ],240 )241 df = DataFrame.from_records(iter(arr), nrows=2)242 xp = DataFrame(243 {244 "x": np.array([1.0, 3.0], dtype=np.float64),245 "u": np.array([1.0, 3.0], dtype=np.float32),246 "y": np.array([2, 4], dtype=np.int64),247 "z": np.array([2, 4], dtype=np.int32),248 }249 )250 tm.assert_frame_equal(df.reindex_like(xp), xp)251 # no dtypes specified here, so just compare with the default252 arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)]253 df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2)254 tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False)255 def test_from_records_tuples_generator(self):256 def tuple_generator(length):257 for i in range(length):258 letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"259 yield (i, letters[i % len(letters)], i / length)260 columns_names = ["Integer", "String", "Float"]261 columns = [262 [i[j] for i in tuple_generator(10)] for j in range(len(columns_names))263 ]264 data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}265 expected = DataFrame(data, columns=columns_names)266 generator = tuple_generator(10)267 result = DataFrame.from_records(generator, columns=columns_names)268 tm.assert_frame_equal(result, expected)269 def test_from_records_lists_generator(self):270 def list_generator(length):271 for i in range(length):272 letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"273 yield [i, letters[i % len(letters)], i / length]274 columns_names = ["Integer", "String", "Float"]275 columns = [276 [i[j] for i in list_generator(10)] for j in range(len(columns_names))277 ]278 data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}279 expected = DataFrame(data, columns=columns_names)280 generator = list_generator(10)281 result = DataFrame.from_records(generator, columns=columns_names)282 tm.assert_frame_equal(result, expected)283 def test_from_records_columns_not_modified(self):284 tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)]285 columns = ["a", "b", "c"]286 original_columns = list(columns)287 df = DataFrame.from_records(tuples, columns=columns, index="a") # noqa288 assert columns == original_columns289 def test_from_records_decimal(self):290 tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)]291 df = DataFrame.from_records(tuples, columns=["a"])292 assert df["a"].dtype == object293 df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True)294 assert df["a"].dtype == np.float64295 assert np.isnan(df["a"].values[-1])296 def test_from_records_duplicates(self):297 result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])298 expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])299 tm.assert_frame_equal(result, expected)300 def test_from_records_set_index_name(self):301 def create_dict(order_id):302 return {303 "order_id": order_id,304 "quantity": np.random.randint(1, 10),305 "price": np.random.randint(1, 10),306 }307 documents = [create_dict(i) for i in range(10)]308 # demo missing data309 documents.append({"order_id": 10, "quantity": 5})310 result = DataFrame.from_records(documents, index="order_id")311 assert result.index.name == "order_id"312 # MultiIndex313 result = DataFrame.from_records(documents, index=["order_id", "quantity"])314 assert result.index.names == ("order_id", "quantity")315 def test_from_records_misc_brokenness(self):316 # GH#2179317 data = {1: ["foo"], 2: ["bar"]}318 result = DataFrame.from_records(data, columns=["a", "b"])319 exp = DataFrame(data, columns=["a", "b"])320 tm.assert_frame_equal(result, exp)321 # overlap in index/index_names322 data = {"a": [1, 2, 3], "b": [4, 5, 6]}323 result = DataFrame.from_records(data, index=["a", "b", "c"])324 exp = DataFrame(data, index=["a", "b", "c"])325 tm.assert_frame_equal(result, exp)326 # GH#2623327 rows = []328 rows.append([datetime(2010, 1, 1), 1])329 rows.append([datetime(2010, 1, 2), "hi"]) # test col upconverts to obj330 df2_obj = DataFrame.from_records(rows, columns=["date", "test"])331 result = df2_obj.dtypes332 expected = Series(333 [np.dtype("datetime64[ns]"), np.dtype("object")], index=["date", "test"]334 )335 tm.assert_series_equal(result, expected)336 rows = []337 rows.append([datetime(2010, 1, 1), 1])338 rows.append([datetime(2010, 1, 2), 1])339 df2_obj = DataFrame.from_records(rows, columns=["date", "test"])340 result = df2_obj.dtypes341 expected = Series(342 [np.dtype("datetime64[ns]"), np.dtype("int64")], index=["date", "test"]343 )344 tm.assert_series_equal(result, expected)345 def test_from_records_empty(self):346 # GH#3562347 result = DataFrame.from_records([], columns=["a", "b", "c"])348 expected = DataFrame(columns=["a", "b", "c"])349 tm.assert_frame_equal(result, expected)350 result = DataFrame.from_records([], columns=["a", "b", "b"])351 expected = DataFrame(columns=["a", "b", "b"])352 tm.assert_frame_equal(result, expected)353 def test_from_records_empty_with_nonempty_fields_gh3682(self):354 a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])355 df = DataFrame.from_records(a, index="id")356 ex_index = Index([1], name="id")357 expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"])358 tm.assert_frame_equal(df, expected)359 b = a[:0]360 df2 = DataFrame.from_records(b, index="id")361 tm.assert_frame_equal(df2, df.iloc[:0])362 def test_from_records_empty2(self):363 # GH#42456364 dtype = [("prop", int)]365 shape = (0, len(dtype))366 arr = np.empty(shape, dtype=dtype)367 result = DataFrame.from_records(arr)368 expected = DataFrame({"prop": np.array([], dtype=int)})369 tm.assert_frame_equal(result, expected)370 alt = DataFrame(arr)...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful