How to use _test_statistics method in pandera

Best Python code snippet using pandera_python

test_schema_statistics.py

Source:test_schema_statistics.py Github

copy

Full Screen

...137 checks = schema_statistics.parse_check_statistics(check_stats)138 if checks is None:139 checks = []140 assert set(checks) == set(expectation)141def _test_statistics(statistics, expectations):142 if not isinstance(statistics, list):143 statistics = [statistics]144 if not isinstance(expectations, list):145 expectations = [expectations]146 for stats, expectation in zip(statistics, expectations):147 stat_dtype = stats.pop("dtype")148 expectation_dtype = expectation.pop("dtype")149 assert stats == expectation150 assert expectation_dtype.check(stat_dtype)151@pytest.mark.parametrize(152 "series, expectation",153 [154 *[155 [156 pd.Series(157 [1, 2, 3], dtype=str(pandas_engine.Engine.dtype(data_type))158 ),159 {160 "dtype": pandas_engine.Engine.dtype(data_type),161 "nullable": False,162 "checks": {163 "greater_than_or_equal_to": 1,164 "less_than_or_equal_to": 3,165 },166 "name": None,167 },168 ]169 for data_type in NUMERIC_TYPES170 ],171 [172 pd.Series(["a", "b", "c", "a"], dtype="category"),173 {174 "dtype": pandas_engine.Engine.dtype(pa.Category),175 "nullable": False,176 "checks": {"isin": ["a", "b", "c"]},177 "name": None,178 },179 ],180 [181 pd.Series(["a", "b", "c", "a"], dtype="string", name="str_series"),182 {183 "dtype": pandas_engine.Engine.dtype("string"),184 "nullable": False,185 "checks": None,186 "name": "str_series",187 },188 ],189 [190 pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),191 {192 "dtype": pandas_engine.Engine.dtype(pa.DateTime),193 "nullable": False,194 "checks": {195 "greater_than_or_equal_to": pd.Timestamp("20180101"),196 "less_than_or_equal_to": pd.Timestamp("20180103"),197 },198 "name": None,199 },200 ],201 ],202)203def test_infer_series_schema_statistics(series, expectation) -> None:204 """Test series statistics are correctly inferred."""205 statistics = schema_statistics.infer_series_statistics(series)206 _test_statistics(statistics, expectation)207@pytest.mark.parametrize(208 "null_index, series, expectation",209 [210 *[211 [212 0,213 pd.Series([1, 2, 3], dtype=str(data_type)),214 {215 # introducing nans to integer arrays upcasts to float216 "dtype": DEFAULT_FLOAT,217 "nullable": True,218 "checks": {219 "greater_than_or_equal_to": 2,220 "less_than_or_equal_to": 3,221 },222 "name": None,223 },224 ]225 for data_type in INTEGER_TYPES226 ],227 [228 # introducing nans to bool arrays upcasts to float except229 # for pandas >= 1.3.0230 0,231 pd.Series([True, False, True, False]),232 {233 "dtype": (234 pandas_engine.Engine.dtype(pa.BOOL)235 if pa.PANDAS_1_3_0_PLUS236 else DEFAULT_FLOAT237 ),238 "nullable": True,239 "checks": (240 None241 if pa.PANDAS_1_3_0_PLUS242 else {243 "greater_than_or_equal_to": 0,244 "less_than_or_equal_to": 1,245 }246 ),247 "name": None,248 },249 ],250 [251 0,252 pd.Series(["a", "b", "c", "a"], dtype="category"),253 {254 "dtype": pandas_engine.Engine.dtype(pa.Category),255 "nullable": True,256 "checks": {"isin": ["a", "b", "c"]},257 "name": None,258 },259 ],260 [261 0,262 pd.Series(["a", "b", "c", "a"], name="str_series"),263 {264 "dtype": pandas_engine.Engine.dtype(str),265 "nullable": True,266 "checks": None,267 "name": "str_series",268 },269 ],270 [271 2,272 pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),273 {274 "dtype": pandas_engine.Engine.dtype(pa.DateTime),275 "nullable": True,276 "checks": {277 "greater_than_or_equal_to": pd.Timestamp("20180101"),278 "less_than_or_equal_to": pd.Timestamp("20180102"),279 },280 "name": None,281 },282 ],283 ],284)285def test_infer_nullable_series_schema_statistics(286 null_index, series, expectation287):288 """Test nullable series statistics are correctly inferred."""289 series.iloc[null_index] = None290 statistics = schema_statistics.infer_series_statistics(series)291 _test_statistics(statistics, expectation)292@pytest.mark.parametrize(293 "index, expectation",294 [295 [296 pd.RangeIndex(20),297 [298 {299 "name": None,300 "dtype": DEFAULT_INT,301 "nullable": False,302 "checks": {303 "greater_than_or_equal_to": 0,304 "less_than_or_equal_to": 19,305 },306 }307 ],308 ],309 [310 pd.Index([1, 2, 3], name="int_index"),311 [312 {313 "name": "int_index",314 "dtype": DEFAULT_INT,315 "nullable": False,316 "checks": {317 "greater_than_or_equal_to": 1,318 "less_than_or_equal_to": 3,319 },320 }321 ],322 ],323 [324 pd.Index(["foo", "bar", "baz"], name="str_index"),325 [326 {327 "name": "str_index",328 "dtype": pandas_engine.Engine.dtype("object"),329 "nullable": False,330 "checks": None,331 },332 ],333 ],334 [335 pd.MultiIndex.from_arrays(336 [[10, 11, 12], pd.Series(["a", "b", "c"], dtype="category")],337 names=["int_index", "str_index"],338 ),339 [340 {341 "name": "int_index",342 "dtype": DEFAULT_INT,343 "nullable": False,344 "checks": {345 "greater_than_or_equal_to": 10,346 "less_than_or_equal_to": 12,347 },348 },349 {350 "name": "str_index",351 "dtype": pandas_engine.Engine.dtype(pa.Category),352 "nullable": False,353 "checks": {"isin": ["a", "b", "c"]},354 },355 ],356 ],357 # UserWarning cases358 [1, UserWarning],359 ["foo", UserWarning],360 [{"foo": "bar"}, UserWarning],361 [["foo", "bar"], UserWarning],362 [pd.Series(["foo", "bar"]), UserWarning],363 [pd.DataFrame({"column": ["foo", "bar"]}), UserWarning],364 ],365)366def test_infer_index_statistics(index, expectation):367 """Test that index statistics are correctly inferred."""368 if expectation is UserWarning:369 with pytest.warns(UserWarning, match="^index type .+ not recognized"):370 schema_statistics.infer_index_statistics(index)371 else:372 _test_statistics(373 schema_statistics.infer_index_statistics(index), expectation374 )375def test_get_dataframe_schema_statistics():376 """Test that dataframe schema statistics logic is correct."""377 schema = pa.DataFrameSchema(378 columns={379 "int": pa.Column(380 int,381 checks=[382 pa.Check.greater_than_or_equal_to(0),383 pa.Check.less_than_or_equal_to(100),384 ],385 nullable=True,386 ),387 "float": pa.Column(388 float,389 checks=[390 pa.Check.greater_than_or_equal_to(50),391 pa.Check.less_than_or_equal_to(100),392 ],393 ),394 "str": pa.Column(395 str,396 checks=[pa.Check.isin(["foo", "bar", "baz"])],397 ),398 },399 index=pa.Index(400 int,401 checks=pa.Check.greater_than_or_equal_to(0),402 nullable=False,403 name="int_index",404 ),405 )406 expectation = {407 "checks": None,408 "columns": {409 "int": {410 "dtype": DEFAULT_INT,411 "checks": {412 "greater_than_or_equal_to": {"min_value": 0},413 "less_than_or_equal_to": {"max_value": 100},414 },415 "nullable": True,416 "unique": False,417 "coerce": False,418 "required": True,419 "regex": False,420 },421 "float": {422 "dtype": DEFAULT_FLOAT,423 "checks": {424 "greater_than_or_equal_to": {"min_value": 50},425 "less_than_or_equal_to": {"max_value": 100},426 },427 "nullable": False,428 "unique": False,429 "coerce": False,430 "required": True,431 "regex": False,432 },433 "str": {434 "dtype": pandas_engine.Engine.dtype(str),435 "checks": {"isin": {"allowed_values": ["foo", "bar", "baz"]}},436 "nullable": False,437 "unique": False,438 "coerce": False,439 "required": True,440 "regex": False,441 },442 },443 "index": [444 {445 "dtype": DEFAULT_INT,446 "checks": {"greater_than_or_equal_to": {"min_value": 0}},447 "nullable": False,448 "coerce": False,449 "name": "int_index",450 }451 ],452 "coerce": False,453 }454 statistics = schema_statistics.get_dataframe_schema_statistics(schema)455 assert statistics == expectation456def test_get_series_schema_statistics():457 """Test that series schema statistics logic is correct."""458 schema = pa.SeriesSchema(459 int,460 nullable=False,461 checks=[462 pa.Check.greater_than_or_equal_to(0),463 pa.Check.less_than_or_equal_to(100),464 ],465 )466 statistics = schema_statistics.get_series_schema_statistics(schema)467 assert statistics == {468 "dtype": pandas_engine.Engine.dtype(int),469 "nullable": False,470 "checks": {471 "greater_than_or_equal_to": {"min_value": 0},472 "less_than_or_equal_to": {"max_value": 100},473 },474 "name": None,475 "coerce": False,476 }477@pytest.mark.parametrize(478 "index_schema_component, expectation",479 [480 [481 pa.Index(482 int,483 checks=[484 pa.Check.greater_than_or_equal_to(10),485 pa.Check.less_than_or_equal_to(20),486 ],487 nullable=False,488 name="int_index",489 ),490 [491 {492 "dtype": pandas_engine.Engine.dtype(int),493 "nullable": False,494 "checks": {495 "greater_than_or_equal_to": {"min_value": 10},496 "less_than_or_equal_to": {"max_value": 20},497 },498 "name": "int_index",499 "coerce": False,500 }501 ],502 ]503 ],504)505def test_get_index_schema_statistics(index_schema_component, expectation):506 """Test that index schema statistics logic is correct."""507 statistics = schema_statistics.get_index_schema_statistics(508 index_schema_component509 )510 _test_statistics(statistics, expectation)511@pytest.mark.parametrize(512 "checks, expectation",513 [514 *[515 [[check], {check.name: check.statistics}]516 for check in [517 pa.Check.greater_than(1),518 pa.Check.less_than(1),519 pa.Check.in_range(1, 3),520 pa.Check.equal_to(1),521 pa.Check.not_equal_to(1),522 pa.Check.notin([1, 2, 3]),523 pa.Check.str_matches("foobar"),524 pa.Check.str_contains("foobar"),...

Full Screen

Full Screen

test_histogram.py

Source:test_histogram.py Github

copy

Full Screen

...16 data = [15, 15, 20, 20, 20, 35, 35, 40, 40, 50, 50]17 histogram = traces.Histogram(data)18 normalized = histogram.normalized()19 assert sum(normalized.values()) == 1.020def _test_statistics(normalized):21 data_list = [22 [1, 2, 3, 5, 6, 7],23 [1, 2, 3, 5, 6],24 [1, 1],25 [1, 1, 1, 1, 1, 1, 1, 2],26 [i + 0.25 for i in [1, 1, 1, 1, 1, 1, 1, 2]],27 ]28 for data in data_list:29 histogram = traces.Histogram(data)30 if normalized:31 histogram = histogram.normalized()32 n = 133 else:34 n = len(data)35 nose.tools.assert_almost_equal(histogram.total(), n)36 nose.tools.assert_almost_equal(histogram.mean(), numpy.mean(data))37 nose.tools.assert_almost_equal(histogram.variance(), numpy.var(data))38 nose.tools.assert_almost_equal(39 histogram.standard_deviation(),40 numpy.std(data),41 )42 nose.tools.assert_almost_equal(histogram.max(), numpy.max(data))43 nose.tools.assert_almost_equal(histogram.min(), numpy.min(data))44 nose.tools.assert_almost_equal(45 histogram.quantile(0.5),46 numpy.median(data),47 )48 q_list = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]49 # linear interpolation50 result = histogram.quantiles(q_list)51 reference = stats.mstats.mquantiles(52 data, prob=q_list, alphap=0.5, betap=0.5,53 )54 for i, j in zip(result, reference):55 nose.tools.assert_almost_equal(i, j)56 # make sure ot throw an error for bad quantile values57 try:58 histogram.quantile(-1)59 except ValueError:60 pass61def test_statistics():62 return _test_statistics(True)63def test_normalized_statistics():64 return _test_statistics(False)65def test_quantile_interpolation():66 data = [1, 1, 1, 2, 3, 5, 6, 7]67 histogram = traces.Histogram(data)68 normalized = histogram.normalized()69 q_list = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]70 # just do the inverse of the emperical cdf71 result = histogram.quantiles(q_list, alpha=0, smallest_count=1)72 answer = [1.0, 1.0, 1.0, 1.0, 2.5, 5.5, 7.0, 7.0, 7.0]73 for i, j in zip(result, answer):74 nose.tools.assert_almost_equal(i, j)75 # same thing with normalized76 result = normalized.quantiles(77 q_list, alpha=0, smallest_count=1.0 / len(data))78 for i, j in zip(result, answer):...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful