Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use to_dataframe method in pandera

Best Python code snippet using pandera_python

SQL.py

Source:SQL.py

...13# Construct a reference to the "full" table    14table_ref = dataset_ref.table('full')15table = client.get_table(table_ref)16table.schema17client.list_rows(table, max_results=5).to_dataframe()18client.list_rows(table, selected_fields=table.schema[:4], max_results=5).to_dataframe()19query3 = """20        SELECT score, title21        FROM `bigquery-public-data.hacker_news.full`22        WHERE type = "job" 23        """24dry_run_config = bigquery.QueryJobConfig(dry_run = True)25dry_run_query_job = client.query(query3, job_config=dry_run_config)26print('this query will process {} bytes'.format(dry_run_query_job.total_bytes_processed))27# Only run the query if it's less than 1 MB28one_mb = 1000*100029safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=one_mb)30# Set up the query (will only run if it's less than 1 MB)31safe_query_job = client.query(query3, job_config=safe_config)32# API request - try to run the query, and return a pandas DataFrame33#GET an ERROR!!!!34safe_query_job.to_dataframe()35# Only run the query if it's less than 1 GB36one_gb = 1000*1000*100037safe_config1 = bigquery.QueryJobConfig(maximum_bytes_billed = one_gb)38# Set up the query (will only run if it's less than 1 GB)39safe_query_job1 = client.query(query3, job_config=safe_config1)40# API request - try to run the query, and return a pandas DataFrame41job_post_scores = safe_query_job1.to_dataframe()42# Print average score for job posts43job_post_scores.score.mean()44## Construct a reference to the "comments" table45table_ref = dataset_ref.table('comments')46# API request - fetch the table47table = client.get_table(table_ref)48#preview the fitst five lines of the 'comments' table49client.list_rows(table, max_results=5).to_dataframe()50# Query to select comments that received more than 10 replies51query_popular = """52                select parent, count(id)53                from `bigquery-public-data.hacker_news.comments`54                group by parent55                having count(id) > 1056                """57safe_config = bigquery.QueryJobConfig(maximum_bytes_billed = 10**10)58query_job = client.query(query_popular, job_config=safe_config)59popular_comments = query_job.to_dataframe()60popular_comments.head()61# Improved version of earlier query, now with aliasing & improved readability62query_improved = """63                 SELECT parent, COUNT(1) AS NumPosts64                 FROM `bigquery-public-data.hacker_news.comments`65                 GROUP BY parent66                 HAVING COUNT(1) > 1067                 """68safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)69query_job = client.query(query_improved, job_config=safe_config)70# API request - run the query, and convert the results to a pandas DataFrame71improved_df = query_job.to_dataframe()72# Print the first five rows of the DataFrame73improved_df.head()74query_good = """75             SELECT parent, COUNT(id)76             FROM `bigquery-public-data.hacker_news.comments`77             GROUP BY parent78             """79safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)80query_job = client.query(query_good, job_config=safe_config)81# API request - run the query, and convert the results to a pandas DataFrame82improved_df = query_job.to_dataframe()83# Print the first five rows of the DataFrame84improved_df.head()85#ERROR!!!86query_bad = """87            SELECT author, parent, COUNT(id)88            FROM `bigquery-public-data.hacker_news.comments`89            GROUP BY parent90            """91safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)92query_job = client.query(query_bad, job_config=safe_config)93# API request - run the query, and convert the results to a pandas DataFrame94improved_df = query_job.to_dataframe()95# Print the first five rows of the DataFrame96improved_df.head()97# Query to select prolific commenters and post counts98prolific_commenters_query = """99                            select author, count(id) as numposts100                            from `bigquery-public-data.hacker_news.comments`101                            group by author102                            having count(id) >10000103                            """104safe_config = bigquery.QueryJobConfig(maximum_bytes_billed = 10**10)105query_job = client.query(prolific_commenters_query, job_config=safe_config)106prolific_commenters = query_job.to_dataframe()107prolific_commenters.head()108#How many comments have been deleted? (If a comment was deleted, the deleted column in the comments table will have the value True.)109prolific_commenters_query = """110                            select deleted, count(id) as numposts111                            from `bigquery-public-data.hacker_news.comments`112                            group by deleted113                            """114safe_config = bigquery.QueryJobConfig(maximum_bytes_billed = 10**10)115query_job = client.query(prolific_commenters_query, job_config=safe_config)116deleted_comments = query_job.to_dataframe()117deleted_comments.head()118#-----------------------------------------------------#119#Chicago120client = bigquery.Client()121dataset_ref = client.dataset('chicago_crime', project='bigquery-public-data')122dataset = client.get_dataset(dataset_ref)123##How many tables are in the Chicago Crime dataset?124tables = list(client.list_tables(dataset))125for table in tables:126    print(table.table_id)127    128##How many columns in the `crime` table have `TIMESTAMP` data?129table_ref = dataset_ref.table('crime')130table = client.get_table(table_ref)131table.schema132##If you wanted to create a map with a dot at the location of each crime, 133##what are the names of the two fields you likely need to pull out of the crime 134##table to plot the crimes on a map?135client.list_rows(table, max_results=5).to_dataframe()136client.list_rows(table, selected_fields=table.schema[-3:], max_results = 5).to_dataframe()137#-----------------------------------------------#138#global_air_quality139#What are all the U.S. cities in the OpenAQ dataset?140client = bigquery.Client()141dataset_ref =client.dataset('openaq', project='bigquery-public-data')142dataset = client.get_dataset(dataset_ref)143tables = list(client.list_tables(dataset))144for table in tables:145    print(table.table_id)146table_ref = dataset_ref.table('global_air_quality')147table = client.get_table(table_ref)148table.schema149client.list_rows(table, max_results=5).to_dataframe()150# Query to select all the items from the "city" column where the "country" column is 'US'151query = """152        SELECT city153        FROM `bigquery-public-data.openaq.global_air_quality`154        WHERE country = 'US'155        """156client = bigquery.Client()157query_job = client.query(query)158us_cities = query_job.to_dataframe()159us_cities.city.value_counts().head()160query1 = """161        SELECT city, country162        FROM `bigquery-public-data.openaq.global_air_quality`163        WHERE country = 'US'164        """165client = bigquery.Client()166query_job = client.query(query1)167us_cities = query_job.to_dataframe()168us_cities.city.value_counts().head()169query2 = """170        SELECT *171        FROM `bigquery-public-data.openaq.global_air_quality`172        WHERE country = 'US'173        """174query_job1 = client.query(query2)175cities = query_job1.to_dataframe()176cities.city.value_counts().head()177cities.country.value_counts().head()178cities.location.value_counts().head()179# Query to select countries with units of "ppm"180query3 = """181        select country182        from `bigquery-public-data.openaq.global_air_quality`183        where unit = 'ppm'184        """185query_job3 = client.query(query3)186ppm = query_job3.to_dataframe()187ppm.country.value_counts().head()188# Set up the query (cancel the query if it would use too much of 189# your quota, with the limit set to 10 GB)190safe_config2 = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)191first_query_job = client.query(query3, job_config=safe_config2)192# API request - run the query, and return a pandas DataFrame193first_results = first_query_job.to_dataframe()194# View top few rows of results195print(first_results.head())196# Query to select all columns where pollution levels are exactly 0197query4 = """198        SELECT *199        FROM `bigquery-public-data.openaq.global_air_quality`200        WHERE value = 0201        """202# Set up the query203safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)204query_job = client.query(query4, job_config=safe_config)205# API request - run the query and return a pandas DataFrame206zero_pollution_results = query_job.to_dataframe()207print(zero_pollution_results.head())208#---------------------------------------------------------#209#Example: Which day of the week has the most fatal motor accidents?210# Create a "Client" object211client = bigquery.Client()212# Construct a reference to the "nhtsa_traffic_fatalities" dataset213dataset_ref = client.dataset("nhtsa_traffic_fatalities", project="bigquery-public-data")214# API request - fetch the dataset215dataset = client.get_dataset(dataset_ref)216# Construct a reference to the "accident_2015" table217table_ref = dataset_ref.table("accident_2015")218# API request - fetch the table219table = client.get_table(table_ref)220# Preview the first five lines of the "accident_2015" table221client.list_rows(table, max_results=5).to_dataframe()222# Query to find out the number of accidents for each day of the week223query = """224        select count(consecutive_number) as num_accidents,225               extract(dayofweek from timestamp_of_crash) as day_of_week226       from `bigquery-public-data.nhtsa_traffic_fatalities.accident_2015`227       group by day_of_week228       order by num_accidents desc229       """230# Set up the query (cancel the query if it would use too much of 231# your quota, with the limit set to 1 GB)232safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**9)233query_job = client.query(query, job_config=safe_config)234# API request - run the query, and convert the results to a pandas DataFrame235accidents_by_day = query_job.to_dataframe()236# Print the DataFrame237accidents_by_day238# Government expenditure on education239# Create a "Client" object240client = bigquery.Client()241# Construct a reference to the "world_bank_intl_education" dataset242dataset_ref = client.dataset("world_bank_intl_education", project="bigquery-public-data")243# API request - fetch the dataset244dataset = client.get_dataset(dataset_ref)245# Construct a reference to the "international_education" table246table_ref = dataset_ref.table("international_education")247# API request - fetch the table248table = client.get_table(table_ref)249# Preview the first five lines of the "international_education" table250client.list_rows(table, max_results=5).to_dataframe()251# Query to find out which countries spend the largest fraction of GDP on education252country_spend_pct_query = """253                          select country_name, 254                                 avg(value) as avg_ed_spending_pct255                          from `bigquery-public-data.world_bank_intl_education.international_education`256                          where indicator_code = 'SE.XPD.TOTL.GD.ZS' and year >= 2010 and year <= 2017257                          group by country_name258                          order by avg_ed_spending_pct desc259                          """260# Set up the query (cancel the query if it would use too much of 261# your quota, with the limit set to 1 GB)262safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)263country_spend_pct_query_job = client.query(country_spend_pct_query, job_config=safe_config)264# API request - run the query, and return a pandas DataFrame265country_spending_results = country_spend_pct_query_job.to_dataframe()266# View top few rows of results267print(country_spending_results.head()) 268                          269#Identify interesting codes to explore270code_count_query = """271                   select count(country_name) as num_rows,272                          indicator_name,273                          indicator_code274                   from `bigquery-public-data.world_bank_intl_education.international_education`275                   where year = 2016276                   group by indicator_name, indicator_code277                   having num_rows >= 175278                   order by num_rows desc279                   """280# Set up the query281safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)282code_count_query_job = client.query(code_count_query, job_config=safe_config)283# API request - run the query, and return a pandas DataFrame284code_count_results = code_count_query_job.to_dataframe()285# View top few rows of results286print(code_count_results.head())287#-----------As With-----------------------------------------------------------#288#How many Bitcoin transactions are made per month?289# Construct a reference to the "crypto_bitcoin" dataset290dataset_ref = client.dataset("crypto_bitcoin", project="bigquery-public-data")291# API request - fetch the dataset292dataset = client.get_dataset(dataset_ref)293# Construct a reference to the "transactions" table294table_ref = dataset_ref.table("transactions")295# API request - fetch the table296table = client.get_table(table_ref)297# Preview the first five lines of the "transactions" table298client.list_rows(table, max_results=5).to_dataframe()299# Query to select the number of transactions per date, sorted by date300query_with_cte = """301                 WITH time As302                 (303                     SELECT date(block_timestamp) as trans_date304                     FROM `bigquery-public-data.crypto_bitcoin.transactions`305                     )306                 SELECT count(1) AS transactions,307                        trans_date308                 FROM time309                 GROUP BY trans_date310                 ORDER BY trans_date311                 """312# Set up the query (cancel the query if it would use too much of 313# your quota, with the limit set to 10 GB)314safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)315query_job = client.query(query_with_cte, job_config=safe_config)316# API request - run the query, and convert the results to a pandas DataFrame317transactions_by_date = query_job.to_dataframe()318# Print the first five rows319transactions_by_date.head()320# raw results to show us the number of Bitcoin transactions per day over the whole timespan of this dataset.321transactions_by_date.set_index('trans_date').plot()322#taxi trips in the city of Chicago.323# Construct a reference to the "chicago_taxi_trips" dataset324dataset_ref = client.dataset("chicago_taxi_trips", project="bigquery-public-data")325# API request - fetch the dataset326dataset = client.get_dataset(dataset_ref)327tables = list(client.list_tables(dataset))328for table in tables:329    print(table.table_id)330# Construct a reference to the "taxi_trips" table331table_ref = dataset_ref.table("taxi_trips")332# API request - fetch the table333table = client.get_table(table_ref)334# Preview the first five lines of the "transactions" table335client.list_rows(table, max_results=5).to_dataframe()336rides_per_year_query = """337                       SELECT EXTRACT(year FROM trip_start_timestamp) as year,338                              COUNT(unique_key) as num_trips339                      FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`340                      GROUP BY year341                      ORDER BY count(unique_key) DESC342                      """343# Set up the query (cancel the query if it would use too much of 344# your quota, with the limit set to 10 GB)345safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)346rides_per_year_query_job = client.query(rides_per_year_query, job_config=safe_config)347# API request - run the query, and convert the results to a pandas DataFrame348rides_per_year_result = rides_per_year_query_job.to_dataframe()349# Print the first five rows350rides_per_year_result.head()351# View results352print(rides_per_year_result)353#You'd like to take a closer look at rides from 2017.354rides_per_month_query = """355                       SELECT EXTRACT(month FROM trip_start_timestamp) as month,356                              COUNT(unique_key) as num_trips357                      FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`358                      WHERE EXTRACT(year FROM trip_start_timestamp) = 2017359                      GROUP BY month360                      ORDER BY count(unique_key) DESC361                      """362# Set up the query (cancel the query if it would use too much of 363# your quota, with the limit set to 10 GB)364safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)365rides_per_month_query_job = client.query(rides_per_month_query, job_config=safe_config)366# API request - run the query, and convert the results to a pandas DataFrame367rides_per_month_result = rides_per_month_query_job.to_dataframe()368# Print the first five rows369rides_per_month_result.head()370# View results371print(rides_per_month_result)372#Write a query that shows, for each hour of the day in the dataset, the corresponding number of trips and average speed.373speeds_query = """374               WITH RelevantRides AS375                (376                    SELECT EXTRACT(hour FROM trip_start_timestamp) as hour_of_day,377                           trip_miles,378                           trip_seconds,379                           unique_key,380                           381                    FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`382                    WHERE trip_start_timestamp > '2017-01-01'383                          and trip_start_timestamp < '2017-07-01' 384                          and trip_seconds > 0 385                          and trip_miles > 0386                ) 387                SELECT hour_of_day,388                       COUNT(unique_key) as num_trips,389                       3600 * SUM(trip_miles) / sum(trip_seconds) as avg_mph390                FROM RelevantRides391                GROUP BY hour_of_day392                ORDER BY hour_of_day393                """394# Set up the query (cancel the query if it would use too much of 395# your quota, with the limit set to 10 GB)396safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**12)397speeds_query_job = client.query(speeds_query, job_config=safe_config)398# API request - run the query, and convert the results to a pandas DataFrame399speeds_result = speeds_query_job.to_dataframe()400# Print the first five rows401speeds_result.head()402# View results403print(speeds_result)404#-------------------------------------------------------------#405# How many files are covered by each type of software license?406from google.cloud import bigquery407# Create a "Client" object408client = bigquery.Client()409# Construct a reference to the "github_repos" dataset410dataset_ref = client.dataset("github_repos", project="bigquery-public-data")411# API request - fetch the dataset412dataset = client.get_dataset(dataset_ref)413# Construct a reference to the "licenses" table414licenses_ref = dataset_ref.table("licenses")415# API request - fetch the table416licenses_table = client.get_table(licenses_ref)417# Preview the first five lines of the "licenses" table418client.list_rows(licenses_table, max_results=5).to_dataframe()419# Construct a reference to the "sample_files" table420files_ref = dataset_ref.table("sample_files")421# API request - fetch the table422files_table = client.get_table(files_ref)423# Preview the first five lines of the "sample_files" table424client.list_rows(files_table, max_results=5).to_dataframe()425# Query to determine the number of files per license, sorted by number of files426query = """427        SELECT l.license, COUNT(1) as number_of_files428        FROM `bigquery-public-data.github_repos.sample_files` AS sf429        INNER JOIN `bigquery-public-data.github_repos.licenses` AS l430            ON sf.repo_name = l.repo_name431        GROUP BY l.license432        ORDER BY number_of_files DESC433        """434# Set up the query (cancel the query if it would use too much of 435# your quota, with the limit set to 10 GB)436safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)437query_job = client.query(query, job_config=safe_config)438# API request - run the query, and convert the results to a pandas DataFrame439file_count_by_license = query_job.to_dataframe()440# Print the DataFrame441file_count_by_license442#write the SQL queries that might serve as the foundation for this type of service.443# Create a "Client" object444client = bigquery.Client()445# Construct a reference to the "stackoverflow" dataset446dataset_ref = client.dataset('stackoverflow', project='bigquery-public-data')447# API request - fetch the dataset448dataset = client.get_dataset(dataset_ref)449# Get a list of available tables 450tables = list(client.list_tables(dataset))451list_of_table = [table.table_id for table in tables]452print(list_of_table)453# Construct a reference to the "posts_answers" table454answers_table_ref = dataset_ref.table("posts_answers")455# API request - fetch the table456answers_table = client.get_table(answers_table_ref)457# Preview the first five lines of the "posts_answers" table458client.list_rows(answers_table, max_results=5).to_dataframe()459# Construct a reference to the "posts_questions" table460questions_table_ref = dataset_ref.table("posts_questions")461# API request - fetch the table462questions_table = client.get_table(questions_table_ref)463# Preview the first five lines of the "posts_questions" table464client.list_rows(questions_table, max_results=5).to_dataframe()465questions_query = """466                  SELECT id, title, owner_user_id467                  FROM `bigquery-public-data.stackoverflow.posts_questions`468                  WHERE tags LIKE '%bigquery%'469                  """470# Set up the query (cancel the query if it would use too much of 471# your quota, with the limit set to 1 GB)472safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)473questions_query_job = client.query(questions_query, job_config=safe_config) # Your code goes here474# API request - run the query, and return a pandas DataFrame475questions_results = questions_query_job.to_dataframe() # Your code goes here476# Preview results477print(questions_results.head())478#479answers_query = """480                SELECT a.id,481                       a.body,482                       a.owner_user_id483                FROM `bigquery-public-data.stackoverflow.posts_questions` AS q 484                INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` AS a485                    ON q.id = a.parent_id486                WHERE q.tags Like '%bigquery%'487                """488# Set up the query (cancel the query if it would use too much of 489# your quota, with the limit set to 1 GB)490safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**12)491answers_query_job = client.query(answers_query, job_config=safe_config)492# API request - run the query, and return a pandas DataFrame493answers_results = answers_query_job.to_dataframe()494# Preview results495print(answers_results.head())496#Answer the question497#You have the merge you need. But you want a list of users who have answered many questions... which requires more work beyond your previous result.498#Write a new query that has a single row for each user who answered at least one question with a tag that includes the string "bigquery". Your results should have two columns:499#user_id - contains the owner_user_id column from the posts_answers table500#number_of_answers - contains the number of answers the user has written to "bigquery"-related questions501bigquery_experts_query = """502                         SELECT a.owner_user_id as user_id,503                                COUNT(1) as number_of_answers504                         FROM `bigquery-public-data.stackoverflow.posts_questions` AS q505                         INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` AS a506                             ON q.id = a.parent_id507                         WHERE q.tags LIKE '%bigquery%'508                         GROUP By a.owner_user_id509                         """510# Set up the query (cancel the query if it would use too much of 511# your quota, with the limit set to 1 GB)512safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)513bigquery_experts_query_job = client.query(bigquery_experts_query, job_config=safe_config)514# API request - run the query, and return a pandas DataFrame515bigquery_experts_results = bigquery_experts_query_job.to_dataframe()516# Preview results517print(bigquery_experts_results.head())518#Building a more generally useful service519#How could you convert what you've done to a general function a website could call on the backend to get experts on any topic?520def expert_finder(topic, client):521    '''522    Returns a DataFrame with the user IDs who have written Stack Overflow answers on a topic.523    Inputs:524        topic: A string with the topic of interest525        client: A Client object that specifies the connection to the Stack Overflow dataset526    Outputs:527        results: A DataFrame with columns for user_id and number_of_answers. Follows similar logic to bigquery_experts_results shown above.528    '''529    my_query = """530               SELECT a.owner_user_id AS user_id, COUNT(1) AS number_of_answers531               FROM `bigquery-public-data.stackoverflow.posts_questions` AS q532               INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` AS a533                   ON q.id = a.parent_Id534               WHERE q.tags like '%{topic}%'535               GROUP BY a.owner_user_id536               """537    # Set up the query (a real service would have good error handling for 538    # queries that scan too much data)539    safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)      540    my_query_job = client.query(my_query, job_config=safe_config)541    # API request - run the query, and return a pandas DataFrame542    results = my_query_job.to_dataframe()...

test_purpleair.py

Source:test_purpleair.py

...14        """15        Test that not using sensor filters works16        """17        p = network.SensorList()18        p.to_dataframe('all', 'parent')19        p.to_dataframe('all', 'child')20    def test_to_dataframe_filtering_outside(self):21        """22        Test that outside sensor filter works23        """24        p = network.SensorList()25        p.to_dataframe('outside', 'parent')26        p.to_dataframe('outside', 'child')27    def test_to_dataframe_filtering_useful(self):28        """29        Test that useful sensor filter works30        """31        p = network.SensorList()32        p.to_dataframe('useful', 'parent')33        p.to_dataframe('useful', 'child')34    def test_to_dataframe_filtering_family(self):35        """36        Test that family sensor filter works37        """38        p = network.SensorList()39        p.to_dataframe('family', 'parent')40        p.to_dataframe('family', 'child')41    def test_to_dataframe_cols(self):42        """43        Test that child and parent sensor dataframes contain the same data44        """45        p = network.SensorList()46        df_a = p.to_dataframe(sensor_filter='all', channel='parent')47        df_b = p.to_dataframe(sensor_filter='all', channel='child')48        self.assertListEqual(list(df_a.columns), list(df_b.columns))49class TestPurpleAirColumnFilters(unittest.TestCase):50    """51    Test that we can initialize the PurpleAir network52    """53    def test_to_dataframe_filtering_no_column(self):54        """55        Test that not providing a column fails56        """57        p = network.SensorList()58        with self.assertRaises(ValueError):59            p.to_dataframe('column', 'parent')60        with self.assertRaises(ValueError):61            p.to_dataframe('column', 'child')62    def test_to_dataframe_filtering_bad_column(self):63        """64        Test that providing a bad column fails65        """66        p = network.SensorList()67        with self.assertRaises(ValueError):68            p.to_dataframe('column', 'parent', 'fake_col_name')69        with self.assertRaises(ValueError):70            p.to_dataframe('column', 'child', 'fake_col_name')71    def test_to_dataframe_filtering_no_value(self):72        """73        Test that providing a bad value fails74        """75        p = network.SensorList()76        p.to_dataframe('column', 'parent', 'temp_f')77        p.to_dataframe('column', 'child', 'temp_f')78    def test_to_dataframe_filtering_good_value(self):79        """80        Test that providing a bad value fails81        """82        p = network.SensorList()83        p.to_dataframe('column', 'parent', 'location_type', 'outside')84        with self.assertRaises(ValueError):85            p.to_dataframe('column', 'child', 'location_type', 'outside')86    def test_to_dataframe_filtering_bad_value(self):87        """88        Test that providing a bad value fails89        """90        p = network.SensorList()91        with self.assertRaises(ValueError):92            p.to_dataframe('column', 'parent', 'location_type', 1234)93        with self.assertRaises(ValueError):...

test_utils_unit.py

Source:test_utils_unit.py

...9    return os.path.dirname(os.path.abspath(__file__))10data_file = os.path.join(rootdir(), "data/poloniex_xrp-btc.json")11with open(data_file, "r") as outfile:12    data = json.load(outfile)13def test_to_dataframe():14    assert isinstance(to_dataframe(data), DataFrame)15def test_resample():16    df = to_dataframe(data)17    assert isinstance(resample(df, "2d"), DataFrame)18    assert list(resample(df, "2d").index.values[-2:]) == [19        numpy.datetime64("2019-05-05T00:00:00.000000000"),20        numpy.datetime64("2019-05-07T00:00:00.000000000"),21    ]22def test_resample_calendar():23    df = to_dataframe(data)24    assert isinstance(resample(df, "W-Mon"), DataFrame)25    assert list(resample(df, "W-Mon").index.values[-2:]) == [26        numpy.datetime64("2019-05-06T00:00:00.000000000"),27        numpy.datetime64("2019-05-13T00:00:00.000000000"),28    ]29def test_trending_up():30    df = to_dataframe(data)31    ma = TA.HMA(df)32    assert isinstance(trending_up(ma, 10), Series)33    assert not trending_up(ma, 10).values[-1]34def test_trending_down():35    df = to_dataframe(data)36    ma = TA.HMA(df)37    assert isinstance(trending_down(ma, 10), Series)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.