How to use extractall method in Playwright Python

Best Python code snippet using playwright-python

Run Playwright Python automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

test_extract.py

Source: test_extract.py Github

copy
1from datetime import datetime
2import re
3
4import numpy as np
5import pytest
6
7from pandas import (
8    DataFrame,
9    Index,
10    MultiIndex,
11    Series,
12    _testing as tm,
13)
14
15
16def test_extract_expand_kwarg_wrong_type_raises(any_string_dtype):
17    # TODO: should this raise TypeError
18    values = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
19    with pytest.raises(ValueError, match="expand must be True or False"):
20        values.str.extract(".*(BAD[_]+).*(BAD)", expand=None)
21
22
23def test_extract_expand_kwarg(any_string_dtype):
24    s = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
25    expected = DataFrame(["BAD__", np.nan, np.nan], dtype=any_string_dtype)
26
27    result = s.str.extract(".*(BAD[_]+).*")
28    tm.assert_frame_equal(result, expected)
29
30    result = s.str.extract(".*(BAD[_]+).*", expand=True)
31    tm.assert_frame_equal(result, expected)
32
33    expected = DataFrame(
34        [["BAD__", "BAD"], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
35    )
36    result = s.str.extract(".*(BAD[_]+).*(BAD)", expand=False)
37    tm.assert_frame_equal(result, expected)
38
39
40def test_extract_expand_False_mixed_object():
41    ser = Series(
42        ["aBAD_BAD", np.nan, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0]
43    )
44
45    # two groups
46    result = ser.str.extract(".*(BAD[_]+).*(BAD)", expand=False)
47    er = [np.nan, np.nan]  # empty row
48    expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er])
49    tm.assert_frame_equal(result, expected)
50
51    # single group
52    result = ser.str.extract(".*(BAD[_]+).*BAD", expand=False)
53    expected = Series(
54        ["BAD_", np.nan, "BAD_", np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
55    )
56    tm.assert_series_equal(result, expected)
57
58
59def test_extract_expand_index_raises():
60    # GH9980
61    # Index only works with one regex group since
62    # multi-group would expand to a frame
63    idx = Index(["A1", "A2", "A3", "A4", "B5"])
64    msg = "only one regex group is supported with Index"
65    with pytest.raises(ValueError, match=msg):
66        idx.str.extract("([AB])([123])", expand=False)
67
68
69def test_extract_expand_no_capture_groups_raises(index_or_series, any_string_dtype):
70    s_or_idx = index_or_series(["A1", "B2", "C3"], dtype=any_string_dtype)
71    msg = "pattern contains no capture groups"
72
73    # no groups
74    with pytest.raises(ValueError, match=msg):
75        s_or_idx.str.extract("[ABC][123]", expand=False)
76
77    # only non-capturing groups
78    with pytest.raises(ValueError, match=msg):
79        s_or_idx.str.extract("(?:[AB]).*", expand=False)
80
81
82def test_extract_expand_single_capture_group(index_or_series, any_string_dtype):
83    # single group renames series/index properly
84    s_or_idx = index_or_series(["A1", "A2"], dtype=any_string_dtype)
85    result = s_or_idx.str.extract(r"(?P<uno>A)\d", expand=False)
86
87    expected = index_or_series(["A", "A"], name="uno", dtype=any_string_dtype)
88    if index_or_series == Series:
89        tm.assert_series_equal(result, expected)
90    else:
91        tm.assert_index_equal(result, expected)
92
93
94def test_extract_expand_capture_groups(any_string_dtype):
95    s = Series(["A1", "B2", "C3"], dtype=any_string_dtype)
96    # one group, no matches
97    result = s.str.extract("(_)", expand=False)
98    expected = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype)
99    tm.assert_series_equal(result, expected)
100
101    # two groups, no matches
102    result = s.str.extract("(_)(_)", expand=False)
103    expected = DataFrame(
104        [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
105    )
106    tm.assert_frame_equal(result, expected)
107
108    # one group, some matches
109    result = s.str.extract("([AB])[123]", expand=False)
110    expected = Series(["A", "B", np.nan], dtype=any_string_dtype)
111    tm.assert_series_equal(result, expected)
112
113    # two groups, some matches
114    result = s.str.extract("([AB])([123])", expand=False)
115    expected = DataFrame(
116        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
117    )
118    tm.assert_frame_equal(result, expected)
119
120    # one named group
121    result = s.str.extract("(?P<letter>[AB])", expand=False)
122    expected = Series(["A", "B", np.nan], name="letter", dtype=any_string_dtype)
123    tm.assert_series_equal(result, expected)
124
125    # two named groups
126    result = s.str.extract("(?P<letter>[AB])(?P<number>[123])", expand=False)
127    expected = DataFrame(
128        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
129        columns=["letter", "number"],
130        dtype=any_string_dtype,
131    )
132    tm.assert_frame_equal(result, expected)
133
134    # mix named and unnamed groups
135    result = s.str.extract("([AB])(?P<number>[123])", expand=False)
136    expected = DataFrame(
137        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
138        columns=[0, "number"],
139        dtype=any_string_dtype,
140    )
141    tm.assert_frame_equal(result, expected)
142
143    # one normal group, one non-capturing group
144    result = s.str.extract("([AB])(?:[123])", expand=False)
145    expected = Series(["A", "B", np.nan], dtype=any_string_dtype)
146    tm.assert_series_equal(result, expected)
147
148    # two normal groups, one non-capturing group
149    s = Series(["A11", "B22", "C33"], dtype=any_string_dtype)
150    result = s.str.extract("([AB])([123])(?:[123])", expand=False)
151    expected = DataFrame(
152        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
153    )
154    tm.assert_frame_equal(result, expected)
155
156    # one optional group followed by one normal group
157    s = Series(["A1", "B2", "3"], dtype=any_string_dtype)
158    result = s.str.extract("(?P<letter>[AB])?(?P<number>[123])", expand=False)
159    expected = DataFrame(
160        [["A", "1"], ["B", "2"], [np.nan, "3"]],
161        columns=["letter", "number"],
162        dtype=any_string_dtype,
163    )
164    tm.assert_frame_equal(result, expected)
165
166    # one normal group followed by one optional group
167    s = Series(["A1", "B2", "C"], dtype=any_string_dtype)
168    result = s.str.extract("(?P<letter>[ABC])(?P<number>[123])?", expand=False)
169    expected = DataFrame(
170        [["A", "1"], ["B", "2"], ["C", np.nan]],
171        columns=["letter", "number"],
172        dtype=any_string_dtype,
173    )
174    tm.assert_frame_equal(result, expected)
175
176
177def test_extract_expand_capture_groups_index(index, any_string_dtype):
178    # https://github.com/pandas-dev/pandas/issues/6348
179    # not passing index to the extractor
180    data = ["A1", "B2", "C"]
181
182    if len(index) < len(data):
183        pytest.skip("Index too short")
184
185    index = index[: len(data)]
186    s = Series(data, index=index, dtype=any_string_dtype)
187
188    result = s.str.extract(r"(\d)", expand=False)
189    expected = Series(["1", "2", np.nan], index=index, dtype=any_string_dtype)
190    tm.assert_series_equal(result, expected)
191
192    result = s.str.extract(r"(?P<letter>\D)(?P<number>\d)?", expand=False)
193    expected = DataFrame(
194        [["A", "1"], ["B", "2"], ["C", np.nan]],
195        columns=["letter", "number"],
196        index=index,
197        dtype=any_string_dtype,
198    )
199    tm.assert_frame_equal(result, expected)
200
201
202def test_extract_single_series_name_is_preserved(any_string_dtype):
203    s = Series(["a3", "b3", "c2"], name="bob", dtype=any_string_dtype)
204    result = s.str.extract(r"(?P<sue>[a-z])", expand=False)
205    expected = Series(["a", "b", "c"], name="sue", dtype=any_string_dtype)
206    tm.assert_series_equal(result, expected)
207
208
209def test_extract_expand_True(any_string_dtype):
210    # Contains tests like those in test_match and some others.
211    s = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
212
213    result = s.str.extract(".*(BAD[_]+).*(BAD)", expand=True)
214    expected = DataFrame(
215        [["BAD__", "BAD"], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
216    )
217    tm.assert_frame_equal(result, expected)
218
219
220def test_extract_expand_True_mixed_object():
221    er = [np.nan, np.nan]  # empty row
222    mixed = Series(
223        [
224            "aBAD_BAD",
225            np.nan,
226            "BAD_b_BAD",
227            True,
228            datetime.today(),
229            "foo",
230            None,
231            1,
232            2.0,
233        ]
234    )
235
236    result = mixed.str.extract(".*(BAD[_]+).*(BAD)", expand=True)
237    expected = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er])
238    tm.assert_frame_equal(result, expected)
239
240
241def test_extract_expand_True_single_capture_group_raises(
242    index_or_series, any_string_dtype
243):
244    # these should work for both Series and Index
245    # no groups
246    s_or_idx = index_or_series(["A1", "B2", "C3"], dtype=any_string_dtype)
247    msg = "pattern contains no capture groups"
248    with pytest.raises(ValueError, match=msg):
249        s_or_idx.str.extract("[ABC][123]", expand=True)
250
251    # only non-capturing groups
252    with pytest.raises(ValueError, match=msg):
253        s_or_idx.str.extract("(?:[AB]).*", expand=True)
254
255
256def test_extract_expand_True_single_capture_group(index_or_series, any_string_dtype):
257    # single group renames series/index properly
258    s_or_idx = index_or_series(["A1", "A2"], dtype=any_string_dtype)
259    result = s_or_idx.str.extract(r"(?P<uno>A)\d", expand=True)
260    expected = DataFrame({"uno": ["A", "A"]}, dtype=any_string_dtype)
261    tm.assert_frame_equal(result, expected)
262
263
264@pytest.mark.parametrize("name", [None, "series_name"])
265def test_extract_series(name, any_string_dtype):
266    # extract should give the same result whether or not the series has a name.
267    s = Series(["A1", "B2", "C3"], name=name, dtype=any_string_dtype)
268
269    # one group, no matches
270    result = s.str.extract("(_)", expand=True)
271    expected = DataFrame([np.nan, np.nan, np.nan], dtype=any_string_dtype)
272    tm.assert_frame_equal(result, expected)
273
274    # two groups, no matches
275    result = s.str.extract("(_)(_)", expand=True)
276    expected = DataFrame(
277        [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
278    )
279    tm.assert_frame_equal(result, expected)
280
281    # one group, some matches
282    result = s.str.extract("([AB])[123]", expand=True)
283    expected = DataFrame(["A", "B", np.nan], dtype=any_string_dtype)
284    tm.assert_frame_equal(result, expected)
285
286    # two groups, some matches
287    result = s.str.extract("([AB])([123])", expand=True)
288    expected = DataFrame(
289        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
290    )
291    tm.assert_frame_equal(result, expected)
292
293    # one named group
294    result = s.str.extract("(?P<letter>[AB])", expand=True)
295    expected = DataFrame({"letter": ["A", "B", np.nan]}, dtype=any_string_dtype)
296    tm.assert_frame_equal(result, expected)
297
298    # two named groups
299    result = s.str.extract("(?P<letter>[AB])(?P<number>[123])", expand=True)
300    expected = DataFrame(
301        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
302        columns=["letter", "number"],
303        dtype=any_string_dtype,
304    )
305    tm.assert_frame_equal(result, expected)
306
307    # mix named and unnamed groups
308    result = s.str.extract("([AB])(?P<number>[123])", expand=True)
309    expected = DataFrame(
310        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
311        columns=[0, "number"],
312        dtype=any_string_dtype,
313    )
314    tm.assert_frame_equal(result, expected)
315
316    # one normal group, one non-capturing group
317    result = s.str.extract("([AB])(?:[123])", expand=True)
318    expected = DataFrame(["A", "B", np.nan], dtype=any_string_dtype)
319    tm.assert_frame_equal(result, expected)
320
321
322def test_extract_optional_groups(any_string_dtype):
323
324    # two normal groups, one non-capturing group
325    s = Series(["A11", "B22", "C33"], dtype=any_string_dtype)
326    result = s.str.extract("([AB])([123])(?:[123])", expand=True)
327    expected = DataFrame(
328        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
329    )
330    tm.assert_frame_equal(result, expected)
331
332    # one optional group followed by one normal group
333    s = Series(["A1", "B2", "3"], dtype=any_string_dtype)
334    result = s.str.extract("(?P<letter>[AB])?(?P<number>[123])", expand=True)
335    expected = DataFrame(
336        [["A", "1"], ["B", "2"], [np.nan, "3"]],
337        columns=["letter", "number"],
338        dtype=any_string_dtype,
339    )
340    tm.assert_frame_equal(result, expected)
341
342    # one normal group followed by one optional group
343    s = Series(["A1", "B2", "C"], dtype=any_string_dtype)
344    result = s.str.extract("(?P<letter>[ABC])(?P<number>[123])?", expand=True)
345    expected = DataFrame(
346        [["A", "1"], ["B", "2"], ["C", np.nan]],
347        columns=["letter", "number"],
348        dtype=any_string_dtype,
349    )
350    tm.assert_frame_equal(result, expected)
351
352
353def test_extract_dataframe_capture_groups_index(index, any_string_dtype):
354    # GH6348
355    # not passing index to the extractor
356
357    data = ["A1", "B2", "C"]
358
359    if len(index) < len(data):
360        pytest.skip("Index too short")
361
362    index = index[: len(data)]
363    s = Series(data, index=index, dtype=any_string_dtype)
364
365    result = s.str.extract(r"(\d)", expand=True)
366    expected = DataFrame(["1", "2", np.nan], index=index, dtype=any_string_dtype)
367    tm.assert_frame_equal(result, expected)
368
369    result = s.str.extract(r"(?P<letter>\D)(?P<number>\d)?", expand=True)
370    expected = DataFrame(
371        [["A", "1"], ["B", "2"], ["C", np.nan]],
372        columns=["letter", "number"],
373        index=index,
374        dtype=any_string_dtype,
375    )
376    tm.assert_frame_equal(result, expected)
377
378
379def test_extract_single_group_returns_frame(any_string_dtype):
380    # GH11386 extract should always return DataFrame, even when
381    # there is only one group. Prior to v0.18.0, extract returned
382    # Series when there was only one group in the regex.
383    s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)
384    result = s.str.extract(r"(?P<letter>[a-z])", expand=True)
385    expected = DataFrame({"letter": ["a", "b", "c"]}, dtype=any_string_dtype)
386    tm.assert_frame_equal(result, expected)
387
388
389def test_extractall(any_string_dtype):
390    data = [
391        "[email protected]",
392        "[email protected]",
393        "[email protected]",
394        "[email protected] some text [email protected]",
395        "[email protected] some text [email protected] and [email protected]",
396        np.nan,
397        "",
398    ]
399    expected_tuples = [
400        ("dave", "google", "com"),
401        ("tdhock5", "gmail", "com"),
402        ("maudelaperriere", "gmail", "com"),
403        ("rob", "gmail", "com"),
404        ("steve", "gmail", "com"),
405        ("a", "b", "com"),
406        ("c", "d", "com"),
407        ("e", "f", "com"),
408    ]
409    pat = r"""
410    (?P<user>[a-z0-9]+)
411    @
412    (?P<domain>[a-z]+)
413    \.
414    (?P<tld>[a-z]{2,4})
415    """
416    expected_columns = ["user", "domain", "tld"]
417    s = Series(data, dtype=any_string_dtype)
418    # extractall should return a DataFrame with one row for each match, indexed by the
419    # subject from which the match came.
420    expected_index = MultiIndex.from_tuples(
421        [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)],
422        names=(None, "match"),
423    )
424    expected = DataFrame(
425        expected_tuples, expected_index, expected_columns, dtype=any_string_dtype
426    )
427    result = s.str.extractall(pat, flags=re.VERBOSE)
428    tm.assert_frame_equal(result, expected)
429
430    # The index of the input Series should be used to construct the index of the output
431    # DataFrame:
432    mi = MultiIndex.from_tuples(
433        [
434            ("single", "Dave"),
435            ("single", "Toby"),
436            ("single", "Maude"),
437            ("multiple", "robAndSteve"),
438            ("multiple", "abcdef"),
439            ("none", "missing"),
440            ("none", "empty"),
441        ]
442    )
443    s = Series(data, index=mi, dtype=any_string_dtype)
444    expected_index = MultiIndex.from_tuples(
445        [
446            ("single", "Dave", 0),
447            ("single", "Toby", 0),
448            ("single", "Maude", 0),
449            ("multiple", "robAndSteve", 0),
450            ("multiple", "robAndSteve", 1),
451            ("multiple", "abcdef", 0),
452            ("multiple", "abcdef", 1),
453            ("multiple", "abcdef", 2),
454        ],
455        names=(None, None, "match"),
456    )
457    expected = DataFrame(
458        expected_tuples, expected_index, expected_columns, dtype=any_string_dtype
459    )
460    result = s.str.extractall(pat, flags=re.VERBOSE)
461    tm.assert_frame_equal(result, expected)
462
463    # MultiIndexed subject with names.
464    s = Series(data, index=mi, dtype=any_string_dtype)
465    s.index.names = ("matches", "description")
466    expected_index.names = ("matches", "description", "match")
467    expected = DataFrame(
468        expected_tuples, expected_index, expected_columns, dtype=any_string_dtype
469    )
470    result = s.str.extractall(pat, flags=re.VERBOSE)
471    tm.assert_frame_equal(result, expected)
472
473
474@pytest.mark.parametrize(
475    "pat,expected_names",
476    [
477        # optional groups.
478        ("(?P<letter>[AB])?(?P<number>[123])", ["letter", "number"]),
479        # only one of two groups has a name.
480        ("([AB])?(?P<number>[123])", [0, "number"]),
481    ],
482)
483def test_extractall_column_names(pat, expected_names, any_string_dtype):
484    s = Series(["", "A1", "32"], dtype=any_string_dtype)
485
486    result = s.str.extractall(pat)
487    expected = DataFrame(
488        [("A", "1"), (np.nan, "3"), (np.nan, "2")],
489        index=MultiIndex.from_tuples([(1, 0), (2, 0), (2, 1)], names=(None, "match")),
490        columns=expected_names,
491        dtype=any_string_dtype,
492    )
493    tm.assert_frame_equal(result, expected)
494
495
496def test_extractall_single_group(any_string_dtype):
497    s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)
498    expected_index = MultiIndex.from_tuples(
499        [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")
500    )
501
502    # extractall(one named group) returns DataFrame with one named column.
503    result = s.str.extractall(r"(?P<letter>[a-z])")
504    expected = DataFrame(
505        {"letter": ["a", "b", "d", "c"]}, index=expected_index, dtype=any_string_dtype
506    )
507    tm.assert_frame_equal(result, expected)
508
509    # extractall(one un-named group) returns DataFrame with one un-named column.
510    result = s.str.extractall(r"([a-z])")
511    expected = DataFrame(
512        ["a", "b", "d", "c"], index=expected_index, dtype=any_string_dtype
513    )
514    tm.assert_frame_equal(result, expected)
515
516
517def test_extractall_single_group_with_quantifier(any_string_dtype):
518    # GH#13382
519    # extractall(one un-named group with quantifier) returns DataFrame with one un-named
520    # column.
521    s = Series(["ab3", "abc3", "d4cd2"], name="series_name", dtype=any_string_dtype)
522    result = s.str.extractall(r"([a-z]+)")
523    expected = DataFrame(
524        ["ab", "abc", "d", "cd"],
525        index=MultiIndex.from_tuples(
526            [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")
527        ),
528        dtype=any_string_dtype,
529    )
530    tm.assert_frame_equal(result, expected)
531
532
533@pytest.mark.parametrize(
534    "data, names",
535    [
536        ([], (None,)),
537        ([], ("i1",)),
538        ([], (None, "i2")),
539        ([], ("i1", "i2")),
540        (["a3", "b3", "d4c2"], (None,)),
541        (["a3", "b3", "d4c2"], ("i1", "i2")),
542        (["a3", "b3", "d4c2"], (None, "i2")),
543        (["a3", "b3", "d4c2"], ("i1", "i2")),
544    ],
545)
546def test_extractall_no_matches(data, names, any_string_dtype):
547    # GH19075 extractall with no matches should return a valid MultiIndex
548    n = len(data)
549    if len(names) == 1:
550        index = Index(range(n), name=names[0])
551    else:
552        tuples = (tuple([i] * (n - 1)) for i in range(n))
553        index = MultiIndex.from_tuples(tuples, names=names)
554    s = Series(data, name="series_name", index=index, dtype=any_string_dtype)
555    expected_index = MultiIndex.from_tuples([], names=(names + ("match",)))
556
557    # one un-named group.
558    result = s.str.extractall("(z)")
559    expected = DataFrame(columns=[0], index=expected_index, dtype=any_string_dtype)
560    tm.assert_frame_equal(result, expected)
561
562    # two un-named groups.
563    result = s.str.extractall("(z)(z)")
564    expected = DataFrame(columns=[0, 1], index=expected_index, dtype=any_string_dtype)
565    tm.assert_frame_equal(result, expected)
566
567    # one named group.
568    result = s.str.extractall("(?P<first>z)")
569    expected = DataFrame(
570        columns=["first"], index=expected_index, dtype=any_string_dtype
571    )
572    tm.assert_frame_equal(result, expected)
573
574    # two named groups.
575    result = s.str.extractall("(?P<first>z)(?P<second>z)")
576    expected = DataFrame(
577        columns=["first", "second"], index=expected_index, dtype=any_string_dtype
578    )
579    tm.assert_frame_equal(result, expected)
580
581    # one named, one un-named.
582    result = s.str.extractall("(z)(?P<second>z)")
583    expected = DataFrame(
584        columns=[0, "second"], index=expected_index, dtype=any_string_dtype
585    )
586    tm.assert_frame_equal(result, expected)
587
588
589def test_extractall_stringindex(any_string_dtype):
590    s = Series(["a1a2", "b1", "c1"], name="xxx", dtype=any_string_dtype)
591    result = s.str.extractall(r"[ab](?P<digit>\d)")
592    expected = DataFrame(
593        {"digit": ["1", "2", "1"]},
594        index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0)], names=[None, "match"]),
595        dtype=any_string_dtype,
596    )
597    tm.assert_frame_equal(result, expected)
598
599    # index should return the same result as the default index without name thus
600    # index.name doesn't affect to the result
601    if any_string_dtype == "object":
602        for idx in [
603            Index(["a1a2", "b1", "c1"]),
604            Index(["a1a2", "b1", "c1"], name="xxx"),
605        ]:
606
607            result = idx.str.extractall(r"[ab](?P<digit>\d)")
608            tm.assert_frame_equal(result, expected)
609
610    s = Series(
611        ["a1a2", "b1", "c1"],
612        name="s_name",
613        index=Index(["XX", "yy", "zz"], name="idx_name"),
614        dtype=any_string_dtype,
615    )
616    result = s.str.extractall(r"[ab](?P<digit>\d)")
617    expected = DataFrame(
618        {"digit": ["1", "2", "1"]},
619        index=MultiIndex.from_tuples(
620            [("XX", 0), ("XX", 1), ("yy", 0)], names=["idx_name", "match"]
621        ),
622        dtype=any_string_dtype,
623    )
624    tm.assert_frame_equal(result, expected)
625
626
627def test_extractall_no_capture_groups_raises(any_string_dtype):
628    # Does not make sense to use extractall with a regex that has no capture groups.
629    # (it returns DataFrame with one column for each capture group)
630    s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)
631    with pytest.raises(ValueError, match="no capture groups"):
632        s.str.extractall(r"[a-z]")
633
634
635def test_extract_index_one_two_groups():
636    s = Series(["a3", "b3", "d4c2"], index=["A3", "B3", "D4"], name="series_name")
637    r = s.index.str.extract(r"([A-Z])", expand=True)
638    e = DataFrame(["A", "B", "D"])
639    tm.assert_frame_equal(r, e)
640
641    # Prior to v0.18.0, index.str.extract(regex with one group)
642    # returned Index. With more than one group, extract raised an
643    # error (GH9980). Now extract always returns DataFrame.
644    r = s.index.str.extract(r"(?P<letter>[A-Z])(?P<digit>[0-9])", expand=True)
645    e_list = [("A", "3"), ("B", "3"), ("D", "4")]
646    e = DataFrame(e_list, columns=["letter", "digit"])
647    tm.assert_frame_equal(r, e)
648
649
650def test_extractall_same_as_extract(any_string_dtype):
651    s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)
652
653    pattern_two_noname = r"([a-z])([0-9])"
654    extract_two_noname = s.str.extract(pattern_two_noname, expand=True)
655    has_multi_index = s.str.extractall(pattern_two_noname)
656    no_multi_index = has_multi_index.xs(0, level="match")
657    tm.assert_frame_equal(extract_two_noname, no_multi_index)
658
659    pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"
660    extract_two_named = s.str.extract(pattern_two_named, expand=True)
661    has_multi_index = s.str.extractall(pattern_two_named)
662    no_multi_index = has_multi_index.xs(0, level="match")
663    tm.assert_frame_equal(extract_two_named, no_multi_index)
664
665    pattern_one_named = r"(?P<group_name>[a-z])"
666    extract_one_named = s.str.extract(pattern_one_named, expand=True)
667    has_multi_index = s.str.extractall(pattern_one_named)
668    no_multi_index = has_multi_index.xs(0, level="match")
669    tm.assert_frame_equal(extract_one_named, no_multi_index)
670
671    pattern_one_noname = r"([a-z])"
672    extract_one_noname = s.str.extract(pattern_one_noname, expand=True)
673    has_multi_index = s.str.extractall(pattern_one_noname)
674    no_multi_index = has_multi_index.xs(0, level="match")
675    tm.assert_frame_equal(extract_one_noname, no_multi_index)
676
677
678def test_extractall_same_as_extract_subject_index(any_string_dtype):
679    # same as above tests, but s has an MultiIndex.
680    mi = MultiIndex.from_tuples(
681        [("A", "first"), ("B", "second"), ("C", "third")],
682        names=("capital", "ordinal"),
683    )
684    s = Series(["a3", "b3", "c2"], index=mi, name="series_name", dtype=any_string_dtype)
685
686    pattern_two_noname = r"([a-z])([0-9])"
687    extract_two_noname = s.str.extract(pattern_two_noname, expand=True)
688    has_match_index = s.str.extractall(pattern_two_noname)
689    no_match_index = has_match_index.xs(0, level="match")
690    tm.assert_frame_equal(extract_two_noname, no_match_index)
691
692    pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"
693    extract_two_named = s.str.extract(pattern_two_named, expand=True)
694    has_match_index = s.str.extractall(pattern_two_named)
695    no_match_index = has_match_index.xs(0, level="match")
696    tm.assert_frame_equal(extract_two_named, no_match_index)
697
698    pattern_one_named = r"(?P<group_name>[a-z])"
699    extract_one_named = s.str.extract(pattern_one_named, expand=True)
700    has_match_index = s.str.extractall(pattern_one_named)
701    no_match_index = has_match_index.xs(0, level="match")
702    tm.assert_frame_equal(extract_one_named, no_match_index)
703
704    pattern_one_noname = r"([a-z])"
705    extract_one_noname = s.str.extract(pattern_one_noname, expand=True)
706    has_match_index = s.str.extractall(pattern_one_noname)
707    no_match_index = has_match_index.xs(0, level="match")
708    tm.assert_frame_equal(extract_one_noname, no_match_index)
709
Full Screen

test_all.py

Source: test_all.py Github

copy
1import os
2import sys
3import tempfile
4from shutil import make_archive
5
6import pytest
7
8from pyunpack import Archive, PatoolError, cli
9
10PY2 = sys.version_info[0] == 2
11formats = ["zip"]
12if sys.platform.startswith("linux"):
13    formats += ["tar", "gztar", "bztar"]
14    if not PY2:
15        formats += ["xztar"]
16
17join = os.path.join
18
19
20def ok_file(d, f):
21    full = join(d, "x.txt")
22    assert os.path.exists(full)
23    assert open(full).read() == "123"
24
25
26def tmpdir():
27    d = tempfile.mkdtemp(prefix="pyunpack_test_")
28    return d
29
30
31def test():
32    with pytest.raises(ValueError):
33        Archive("blabla").extractall(tempfile.gettempdir())
34    with pytest.raises(PatoolError):
35        Archive(__file__).extractall(tempfile.gettempdir())
36
37
38def create_arc(format):
39    d = tmpdir()
40    x_txt = join(d, "x.txt")
41    open(x_txt, "w").write("123")
42    # x_zip = d / "x.zip"
43
44    os.chdir(d)
45    x_zip = make_archive(
46        "x",
47        format,  # the archive format - or tar, bztar, gztar
48        root_dir=None,  # root for archive - current working dir if None
49        base_dir=None,
50    )  # start archiving from here - cwd if None too
51
52    # EasyProcess(["zip", "--no-dir-entries", x_zip, "x.txt"], cwd=d).call()
53    return x_zip
54
55
56def test2():
57    for f in formats:
58        print(f)
59        x_zip = create_arc(f)
60
61        with pytest.raises(ValueError):
62            Archive(x_zip).extractall("blabla")
63
64        d = tmpdir()
65        Archive(x_zip, backend="patool").extractall(d)
66        ok_file(d, "x.txt")
67
68        d = tmpdir()
69        Archive(x_zip).extractall(d)
70        ok_file(d, "x.txt")
71
72        d = tmpdir()
73        Archive(x_zip, backend="auto").extractall(d)
74        ok_file(d, "x.txt")
75
76        if f == "zip":
77            d = tmpdir()
78            Archive(x_zip, backend="zipfile").extractall(d)
79            ok_file(d, "x.txt")
80
81        d = tmpdir()
82        cli.extractall(x_zip, d)
83        ok_file(d, "x.txt")
84
85
86def test_subdir():
87    for f in formats:
88        x_zip = create_arc(f)
89
90        d = join(tmpdir(), "subdir")
91        with pytest.raises(ValueError):
92            Archive(x_zip).extractall(d, auto_create_dir=False)
93
94        d = join(tmpdir(), "subdir")
95        Archive(x_zip, backend="auto").extractall(d, auto_create_dir=True)
96        ok_file(d, "x.txt")
97
Full Screen

Accelerate Your Automation Test Cycles With LambdaTest

Leverage LambdaTest’s cloud-based platform to execute your automation tests in parallel and trim down your test execution time significantly. Your first 100 automation testing minutes are on us.

Try LambdaTest

Run Python Tests on LambdaTest Cloud Grid

Execute automation tests with Playwright Python on a cloud-based Grid of 3000+ real browsers and operating systems for both web and mobile applications.

Test now for Free
LambdaTestX

We use cookies to give you the best experience. Cookies help to provide a more personalized experience and relevant advertising for you, and web analytics for us. Learn More in our Cookies policy, Privacy & Terms of service

Allow Cookie
Sarah

I hope you find the best code examples for your project.

If you want to accelerate automated browser testing, try LambdaTest. Your first 100 automation testing minutes are FREE.

Sarah Elson (Product & Growth Lead)