Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More

How to use two_sample_ttest method in pandera

Best Python code snippet using pandera_python

test_hypotheses.py

Source:test_hypotheses.py

...91        {92            "height_in_feet": Column(93                Float,94                [95                    Hypothesis.two_sample_ttest(96                        sample1="M",97                        sample2="F",98                        groupby="sex",99                        relationship="greater_than",100                        alpha=0.5,101                    ),102                ],103            ),104            "sex": Column(String),105        }106    )107    schema_pass_ttest_on_alpha_val_2 = DataFrameSchema(108        {109            "height_in_feet": Column(110                Float,111                [112                    Hypothesis(113                        test=stats.ttest_ind,114                        samples=["M", "F"],115                        groupby="sex",116                        relationship="greater_than",117                        relationship_kwargs={"alpha": 0.5},118                    ),119                ],120            ),121            "sex": Column(String),122        }123    )124    schema_pass_ttest_on_alpha_val_3 = DataFrameSchema(125        {126            "height_in_feet": Column(127                Float,128                [129                    Hypothesis.two_sample_ttest(130                        sample1="M",131                        sample2="F",132                        groupby="sex",133                        relationship="greater_than",134                        alpha=0.5,135                    ),136                ],137            ),138            "sex": Column(String),139        }140    )141    schema_pass_ttest_on_custom_relationship = DataFrameSchema(142        {143            "height_in_feet": Column(144                Float,145                [146                    Hypothesis(147                        test=stats.ttest_ind,148                        samples=["M", "F"],149                        groupby="sex",150                        relationship=lambda stat, pvalue, alpha=0.01: (151                            stat > 0 and pvalue / 2 < alpha152                        ),153                        relationship_kwargs={"alpha": 0.5},154                    )155                ],156            ),157            "sex": Column(String),158        }159    )160    # Check the 3 happy paths are successful:161    schema_pass_ttest_on_alpha_val_1.validate(df)162    schema_pass_ttest_on_alpha_val_2.validate(df)163    schema_pass_ttest_on_alpha_val_3.validate(df)164    schema_pass_ttest_on_custom_relationship.validate(df)165    schema_fail_ttest_on_alpha_val_1 = DataFrameSchema(166        {167            "height_in_feet": Column(168                Float,169                [170                    Hypothesis.two_sample_ttest(171                        sample1="M",172                        sample2="F",173                        groupby="sex",174                        relationship="greater_than",175                        alpha=0.05,176                    ),177                ],178            ),179            "sex": Column(String),180        }181    )182    schema_fail_ttest_on_alpha_val_2 = DataFrameSchema(183        {184            "height_in_feet": Column(185                Float,186                [187                    Hypothesis(188                        test=stats.ttest_ind,189                        samples=["M", "F"],190                        groupby="sex",191                        relationship="greater_than",192                        relationship_kwargs={"alpha": 0.05},193                    ),194                ],195            ),196            "sex": Column(String),197        }198    )199    schema_fail_ttest_on_alpha_val_3 = DataFrameSchema(200        {201            "height_in_feet": Column(202                Float,203                [204                    Hypothesis.two_sample_ttest(205                        sample1="M",206                        sample2="F",207                        groupby="sex",208                        relationship="greater_than",209                        alpha=0.05,210                    ),211                ],212            ),213            "sex": Column(String),214        }215    )216    with pytest.raises(errors.SchemaError):217        schema_fail_ttest_on_alpha_val_1.validate(df)218    with pytest.raises(errors.SchemaError):219        schema_fail_ttest_on_alpha_val_2.validate(df)220    with pytest.raises(errors.SchemaError):221        schema_fail_ttest_on_alpha_val_3.validate(df)222def test_two_sample_ttest_hypothesis_relationships():223    """Check allowable relationships in two-sample ttest."""224    for relationship in Hypothesis.RELATIONSHIPS:225        schema = DataFrameSchema(226            {227                "height_in_feet": Column(228                    Float,229                    [230                        Hypothesis.two_sample_ttest(231                            sample1="M",232                            sample2="F",233                            groupby="sex",234                            relationship=relationship,235                            alpha=0.5,236                        ),237                    ],238                ),239                "sex": Column(String),240            }241        )242        assert isinstance(schema, DataFrameSchema)243    for relationship in ["foo", "bar", 1, 2, 3, None]:244        with pytest.raises(errors.SchemaInitError):245            DataFrameSchema(246                {247                    "height_in_feet": Column(248                        Float,249                        [250                            Hypothesis.two_sample_ttest(251                                sample1="M",252                                sample2="F",253                                groupby="sex",254                                relationship=relationship,255                                alpha=0.5,256                            ),257                        ],258                    ),259                    "sex": Column(String),260                }261            )262def test_one_sample_hypothesis():263    """Check one sample ttest."""264    schema = DataFrameSchema(...

hypothesis_testing.py

Source:hypothesis_testing.py

2import sqlite33import sys4sys.path.append('../')5from Spotify_Song_Data import getSongData6def two_sample_ttest(peak_values, trough_values):7    """8    Input:9        - peak_values: the attribute values of songs during a COVID peak time period10        - trough_values the attribute values of songs from a COVID trough time period11    Output:12        - tstats: Test statistics (float)13        - p-value: P-value (float)14    """15    # Using scipy's ttest_ind16    # (https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html)17    # to get the t-statistic and the p-value18    # Note: The code will disregard null (nan) values. We will assume equal variance.19    # Indepdendent two sample t-test, since testing unknown population means of 20    # two groups21    tstats, pvalue = ttest_ind(peak_values, trough_values, nan_policy='omit')22    # Print tstats, pvalue23    print("two_sample_ttest tstats: ", tstats)24    print("two_sample_ttest pvalue: ", pvalue)25    return tstats, pvalue26def get_songs_by_date(month, day, year):27    db = "/Users/mohammedakel/Desktop/CS1951A-Spring2022/rim-dj/data_deliverable/data/billboard.db"28    # create a database connection29    conn = sqlite3.connect(db)30    cur = conn.cursor()31    cur.execute("SELECT title, artist FROM billboard WHERE month=? AND day=? AND year=?", (month, day, year,))32    rows = cur.fetchall()33    song_titles = []34    artist_names = []35    for row in rows:36        song_titles.append(row[0])37        artist_names.append(row[1])38    return song_titles, artist_names39def get_peak_attribute(attribute):40    # Peaks: 4/10/2020, 7/24/2020, 1/11/2021, 9/13/2021, 1/15/202241    titles = []42    names = []43    attribute_values = []44    45    titles_one, names_one = get_songs_by_date(4, 10, 2020)46    titles_two, names_two = get_songs_by_date(7, 24, 2020)47    titles_three, names_three = get_songs_by_date(1, 11, 2021)48    titles_four, names_four = get_songs_by_date(9, 13, 2021)49    titles_five, names_five = get_songs_by_date(1, 15, 2022)50    titles = titles + titles_one + titles_two + titles_three + titles_four + titles_five51    names = names + names_one + names_two + names_three + names_four + names_five52    for i in range(len(titles)):53        song = getSongData(titles[i], names[i], 1)54        if len(song) != 0:55            attribute_values.append(song[0][attribute])56    return attribute_values57def get_trough_attribute(attribute):58    # Troughs: 9/11/2020, 12/30/2020, 6/22/2021, 10/26/2021, 11/28/202159    titles = []60    names = []61    attribute_values = []62    63    titles_one, names_one = get_songs_by_date(9, 11, 2020)64    titles_two, names_two = get_songs_by_date(12, 30, 2020)65    titles_three, names_three = get_songs_by_date(6, 22, 2021)66    titles_four, names_four = get_songs_by_date(10, 26, 2021)67    titles_five, names_five = get_songs_by_date(11, 28, 2022)68    titles = titles + titles_one + titles_two + titles_three + titles_four + titles_five69    names = names + names_one + names_two + names_three + names_four + names_five70    for i in range(len(titles)):71        song = getSongData(titles[i], names[i], 1)72        if len(song) != 0:73            attribute_values.append(song[0][attribute])74    return attribute_values75'''76if __name__ == "__main__":77    # get the peak values and trough values for testing per attribute78    # Note: we can test additional attributes by changing the input argument in get_[peak/trough]_attribute()79    print("---Testing Danceability Difference---")80    peak_danceability = get_peak_attribute("danceability")81    trough_danceability = get_trough_attribute("danceability")82    two_sample_ttest(peak_values=peak_danceability, trough_values=trough_danceability)83    print("---Testing Energy Difference---")84    peak_energy = get_peak_attribute("energy")85    trough_energy = get_trough_attribute("energy")86    two_sample_ttest(peak_values=peak_energy, trough_values=trough_energy)87    print("---Testing Valence Difference---")88    peak_valence = get_peak_attribute("valence")89    trough_valence = get_trough_attribute("valence")90    two_sample_ttest(peak_values=peak_valence, trough_values=trough_valence)91'''92    ...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.