Best Python code snippet using assertpy_python
segmentation.py
Source:segmentation.py  
1#!/usr/bin/env python32# -*- coding: utf-8 -*-3# Created on Thu Nov 26 19:09:53 20204# Author:     zhuoyin94 <zhuoyin94@163.com>5# Github:     https://github.com/MichaelYin19946# Reference:  https://github.com/someus/7"""8æ¬æ¨¡å(textrank.segmentation)æä¾äºç¨äºä¸æåè¯çWordSegmentationç±»ã9"""10import pkuseg11import unicodedata12SENTENCE_DELIMITERS = ["?", "!", ";", "ï¼", "ã", ",", ":",13                       "ï¼", "ã", "ï¼", "â¦â¦", "â¦", "\n", "\t"]14ALLOW_WORD_TAGS = ["an", "i", "j", "l", "n",15                   "nr", "nrfg", "ns", "nt",16                   "nz", "t", "v", "vd", "vn", "eng"]17# cop = re.compile(u"[^\u4e00-\u9faf^*^a-z^A-Z^0-9]")18class WordSegmentation():19    """åè¯è¾
å©ã便®ç»å®æ¡ä»¶ï¼å°å
å«å¥åçå表åå为è¯çæåºéåã20    @Parameters:21    ----------22        is_lower: {bool-like}23            æ¯å¦å°ææ¬ä¸çè±æå符置为å°åï¼é»è®¤å¼å¯ã24        is_use_stop_words: {bool-like}25            æ¯å¦å°å¾
å¤çææ¬ä¸çåç¨è¯æ»¤é¤ï¼åç¨è¯è¡¨ç±stop_words_vocabç¡®å®ã26        is_use_word_tags_filter: {bool-like}27            æ¯å¦æ ¹æ®è¯æ§å¯¹åååçè¯è¯è¿è¡æ¸
æ´ã28        allow_word_tags: {list-like}29            å
许çè¯æ§å表ã30        delimiters: {list-like}31            åºå段è½ä¸å¥åçåé符ã32        stop_words_vocab: {list-like}33            ç¨æ·åç¨è¯è¡¨ï¼é»è®¤ä¸ºç©ºã34        user_vocab: {list-like}35            ç¨æ·ä¸ä¸è¯è¡¨ï¼é»è®¤ä¸ºç©ºï¼è¥æ¯ä¼ å¼åå¨åè¯è¿ç¨ä¸pkusegä¸å¯¹è¿äºè¯è¿è¡ååã36    @References:37    ----------38    [1] https://github.com/letiantian/TextRank4ZH39    [2] https://github.com/lancopku/pkuseg-python40    """41    def __init__(self,42                 is_lower=True,43                 is_use_stop_words=False,44                 is_use_word_tags_filter=False,45                 allow_word_tags=None,46                 delimiters=None,47                 user_vocab=None,48                 stop_words_vocab=None):49        # é对è¾å
¥stop_wordsçé¢å¤ç50        self.stop_words = stop_words_vocab or []51        self.stop_words = [word.strip() for word in self.stop_words]52        self.stop_words = set(self.stop_words)53        self.default_user_vocab = user_vocab54        # ç±»åæ°55        self.is_lower = is_lower56        self.is_use_stop_words = is_use_stop_words57        self.is_use_word_tags_filter = is_use_word_tags_filter58        if not allow_word_tags:59            self.default_allow_word_tags = list(set(ALLOW_WORD_TAGS))60        else:61            self.default_allow_word_tags = list(set(allow_word_tags))62        if not delimiters:63            self.default_delimiters = list(set(SENTENCE_DELIMITERS))64        else:65            self.default_delimiters = list(set(delimiters))66        # {å¥åï¼ååçå¥å}67        self.sentence_cutted_dict = {}68        self.sentence_cutted_postag_dict = {}69        # TODO(zhuoyin94@163.com): pkusegçpostagéè¦internetè¿æ¥è·åè¯è¡¨70        self.seg = pkuseg.pkuseg(user_dict=user_vocab, postag=True)71    def segment_sentence(self, sentence,72                         is_lower=None,73                         is_use_stop_words=None,74                         is_use_word_tags_filter=None):75        """对å个å¥å(sentence)è¿è¡åè¯ï¼ä»¥listç±»åè¿ååè¯åçç»æã76        @Parameters:77        ----------78            sentence: {str-like}79                éè¦è¢«åè¯çå¥åï¼å符串类åï¼ã80            is_lower: {bool-like}81                è§å
¨å±æ³¨éã82            is_use_stop_words: {bool-like}83                è§å
¨å±æ³¨éã84            is_use_word_tags_filter: {bool-like}85                è§å
¨å±æ³¨éã86        @Raises:87        ----------88            TypeError: è¾å
¥ç䏿¯å符串导è´ç±»åé误89        @Returns:90        ----------91            pkusegåè¯åçåè¯å表ãå¦ï¼92            "ææ¯ä¸å½å
¬æ°" --->>  [âæâ, "æ¯", "ä¸å½", "å
¬æ°"]93        """94        if not isinstance(sentence, str):95            raise TypeError(("The input sentence type should be str, turns " +96                             "out to be: {}".format(type(sentence))))97        if not is_lower:98            is_lower = self.is_lower99        if not is_use_stop_words:100            is_use_stop_words = self.is_use_stop_words101        if not is_use_word_tags_filter:102            is_use_word_tags_filter = self.is_use_word_tags_filter103        # è°ç¨pkusegï¼ååå¥å104        if sentence in self.sentence_cutted_dict and \105            sentence in self.sentence_cutted_postag_dict:106            word_list = self.sentence_cutted_dict[sentence]107            postag_list = self.sentence_cutted_postag_dict[sentence]108        else:109            sentence_cutted = self.seg.cut(sentence)110            word_list = [item[0] for item in sentence_cutted]111            postag_list = [item[1] for item in sentence_cutted]112            self.sentence_cutted_dict[sentence] = word_list113            self.sentence_cutted_postag_dict[sentence] = postag_list114        # STEP 1: 便®è¯æ§æ»¤é¤ä¸æ»¡è¶³è¯æ§è¦æ±çè¯æ±115        if is_use_word_tags_filter:116            word_list_tmp = []117            for i, word in enumerate(word_list):118                if postag_list[i] in self.default_allow_word_tags:119                    word_list_tmp.append(word)120            word_list = word_list_tmp121        # STEP 2: æ»¤é¤ææ¬ä¸çç¹æ®ç¬¦å·122        word_list = [word.strip() for word in word_list]123        word_list = [word for word in word_list if len(word) > 0]124        # STEP 3: 转æ¢è±æç大å°å125        if is_lower:126            word_list = [word.lower() for word in word_list]127        # STEP 4: 便®è¦æ±æ»¤é¤åç¨è¯128        if is_use_stop_words:129            word_list_tmp = []130            for i, word in enumerate(word_list):131                if word not in self.stop_words:132                    word_list_tmp.append(word)133            word_list = word_list_tmp134        return word_list135    def segment_sentence_list(self, sentence_list,136                              is_lower=None,137                              is_use_stop_words=None,138                              is_use_word_tags_filter=None):139        """对å
å«å¥åéåçå表å
çæ¯ä¸ä¸ªå¥åè¿è¡ååã140        @Parameters:141        ----------142            sentence_list: {list-like}143                éè¦è¢«åè¯çå¥åéåï¼listç±»åï¼ãlistçæ¯ä¸ä¸ªå
素为ä¸ä¸ªæªè¢«åè¯çå¥åã144            is_lower: {bool-like}145                è§å
¨å±æ³¨éã146            is_use_stop_words: {bool-like}147                è§å
¨å±æ³¨éã148            is_use_word_tags_filter: {bool-like}149                è§å
¨å±æ³¨éã150        @Raises:151        ----------152            TypeError: æ¹æ³è¾å
¥ä¸æ¯å¥åçå表éå导è´ç±»åé误153        @Returns:154        ----------155            åè¯æè
åå¥ä¹åçç»æã代ç å®ç°åèäºæç®[1]ãåè¯ç»æä¾å¦ï¼156            [[âæâ, "æ¯", "ä¸å½", "å
¬æ°"],157            ...158            ["å京", "å·¥ä¸", "大å¦"]]159        """160        if not isinstance(sentence_list, list):161            raise TypeError("Invalid input sentence list !")162        if not is_lower:163            is_lower = self.is_lower164        if not is_use_stop_words:165            is_use_stop_words = self.is_use_stop_words166        if not is_use_word_tags_filter:167            is_use_word_tags_filter = self.is_use_word_tags_filter168        sentence_cutted_list = []169        for sentence in sentence_list:170            sentence_cutted_list.append(171                self.segment_sentence(sentence,172                                      is_lower,173                                      is_use_stop_words,174                                      is_use_word_tags_filter))175        return sentence_cutted_list176    def segment_paragraph(self, paragraph=None,177                          is_lower=None,178                          is_use_stop_words=None,179                          is_use_word_tags_filter=None):180        """对广ä¹çæç« ï¼å¥å段è½ä¹è¢«è®¤ä¸ºæ¯æç« ï¼ä¾æ®åå²ç¬¦ï¼åå为å¥åçå表ã181        @Parameters:182        ----------183            paragraph: {str-like}184                éè¦è¢«åè¯çå¥åéåï¼listç±»åï¼ãlistçæ¯ä¸ä¸ªå
素为ä¸ä¸ªæªè¢«åè¯çå¥åã185            is_lower: {bool-like}186                è§å
¨å±æ³¨éã187            is_use_stop_words: {bool-like}188                è§å
¨å±æ³¨éã189            is_use_word_tags_filter: {bool-like}190                è§å
¨å±æ³¨éã191        @Returns:192        ----------193            åååçå¥åå表ã194        """195        if not isinstance(paragraph, str):196            raise TypeError("Invalid input paragraph type !")197        if not is_lower:198            is_lower = self.is_lower199        if not is_use_stop_words:200            is_use_stop_words = self.is_use_stop_words201        if not is_use_word_tags_filter:202            is_use_word_tags_filter = self.is_use_word_tags_filter203        # STEP 0: é¢å¤çãå°½éå°paragraphç符å·è½¬æ¢ä¸ºè±æåç¬¦ï¼æåååæ£ç¡®ç204        paragraph = unicodedata.normalize("NFKC", paragraph)205        # STEP 1: 便®åé符ï¼å°æ®µè½åå为å¥åå表206        tmp = [paragraph]207        for sep in self.default_delimiters:208            sentence_list = tmp209            tmp = []210            for sentence in sentence_list:211                tmp += sentence.split(sep)212        sentence_list = [s.strip() for s in sentence_list if len(s.strip()) > 0]213        # STEP 2: 对å¥ååè¡¨çæ¯ä¸ä¸ªå¥åè¿è¡åè¯214        sentence_list_cutted = self.segment_sentence_list(215            sentence_list, is_lower, is_use_stop_words,216            is_use_word_tags_filter)...mvn_tri_test.py
Source:mvn_tri_test.py  
1# Copyright 2021 DeepMind Technologies Limited. All Rights Reserved.2#3# Licensed under the Apache License, Version 2.0 (the "License");4# you may not use this file except in compliance with the License.5# You may obtain a copy of the License at6#7#     http://www.apache.org/licenses/LICENSE-2.08#9# Unless required by applicable law or agreed to in writing, software10# distributed under the License is distributed on an "AS IS" BASIS,11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12# See the License for the specific language governing permissions and13# limitations under the License.14# ==============================================================================15"""Tests for `mvn_tri.py`."""16from absl.testing import absltest17from absl.testing import parameterized18import chex19from distrax._src.distributions.mvn_tri import MultivariateNormalTri20from distrax._src.utils import equivalence21import jax.numpy as jnp22import numpy as np23def _get_scale_tril_from_scale_triu(scale_triu: np.ndarray) -> np.ndarray:24  scale_triu = np.triu(scale_triu)25  scale_triu_t = np.vectorize(np.transpose, signature='(k,k)->(k,k)')(26      scale_triu)27  cov = np.matmul(scale_triu, scale_triu_t)28  return np.linalg.cholesky(cov)29class MultivariateNormalTriTest(30    equivalence.EquivalenceTest, parameterized.TestCase):31  def setUp(self):32    # pylint: disable=too-many-function-args33    super().setUp(MultivariateNormalTri)34  @parameterized.named_parameters(35      ('all inputs are None', {}),36      ('wrong dimension of loc', {37          'loc': np.array(0.),38      }),39      ('scale_tri is 0d', {40          'scale_tri': np.array(1.),41      }),42      ('scale_tri is 1d', {43          'scale_tri': np.ones((4,)),44      }),45      ('scale_tri is not square', {46          'scale_tri': np.ones((4, 3)),47      }),48      ('inconsistent loc and scale_tri', {49          'loc': np.zeros((4,)),50          'scale_tri': np.ones((5, 5)),51      }),52  )53  def test_raises_on_wrong_inputs(self, dist_kwargs):54    with self.assertRaises(ValueError):55      self.distrax_cls(**dist_kwargs)56  @parameterized.named_parameters(57      ('loc provided', {'loc': np.zeros((4,))}),58      ('scale_tri provided', {'scale_tri': np.eye(4)}),59  )60  def test_default_properties(self, dist_kwargs):61    dist = self.distrax_cls(**dist_kwargs)62    self.assertTrue(dist.is_lower)63    self.assertion_fn(rtol=1e-3)(dist.loc, jnp.zeros((4,)))64    self.assertion_fn(rtol=1e-3)(dist.scale_tri, jnp.eye(4))65  @parameterized.named_parameters(66      ('unbatched', (), (4,), (4, 4), True),67      ('batched loc', (7,), (7, 4), (4, 4), True),68      ('batched scale_tri lower', (7,), (4,), (7, 4, 4), True),69      ('batched scale_tri upper', (7,), (4,), (7, 4, 4), False),70  )71  def test_properties(self, batch_shape, loc_shape, scale_tri_shape, is_lower):72    rng = np.random.default_rng(2022)73    loc = rng.normal(size=loc_shape)74    scale_tri = rng.normal(size=scale_tri_shape)75    dist = self.distrax_cls(loc=loc, scale_tri=scale_tri, is_lower=is_lower)76    tri_fn = jnp.tril if is_lower else jnp.triu77    self.assertEqual(dist.batch_shape, batch_shape)78    self.assertEqual(dist.is_lower, is_lower)79    self.assertion_fn(rtol=1e-3)(80        dist.loc, jnp.broadcast_to(loc, batch_shape + (4,)))81    self.assertion_fn(rtol=1e-3)(dist.scale_tri, jnp.broadcast_to(82        tri_fn(scale_tri), batch_shape + (4, 4)))83  @chex.all_variants84  @parameterized.named_parameters(85      ('unbatched, no shape', (), (4,), (4, 4)),86      ('batched loc, no shape', (), (7, 4), (4, 4)),87      ('batched scale_tri, no shape', (), (4,), (7, 4, 4)),88      ('unbatched, with shape', (3,), (4,), (4, 4)),89      ('batched loc, with shape', (3,), (7, 4), (4, 4)),90      ('batched scale_tri, with shape', (3,), (4,), (7, 4, 4)),91  )92  def test_sample_shape(self, sample_shape, loc_shape, scale_tri_shape):93    rng = np.random.default_rng(2022)94    loc = rng.normal(size=loc_shape)95    scale_tri = rng.normal(size=scale_tri_shape)96    dist_kwargs = {'loc': loc, 'scale_tri': scale_tri}97    tfp_dist_kwargs = {'loc': loc, 'scale_tril': scale_tri}98    super()._test_sample_shape(99        dist_args=(), dist_kwargs=dist_kwargs, tfp_dist_kwargs=tfp_dist_kwargs,100        sample_shape=sample_shape)101  @chex.all_variants102  @parameterized.named_parameters(103      ('float32', jnp.float32),104      ('float64', jnp.float64))105  def test_sample_dtype(self, dtype):106    dist_params = {107        'loc': np.array([0., 0.], dtype),108        'scale_tri': np.array([[1., 0.], [0., 1.]], dtype)}109    dist = self.distrax_cls(**dist_params)110    samples = self.variant(dist.sample)(seed=self.key)111    self.assertEqual(samples.dtype, dist.dtype)112    chex.assert_type(samples, dtype)113  @chex.all_variants114  @parameterized.named_parameters(115      ('unbatched, unbatched value', (4,), (4,), (4, 4), True),116      ('unbatched, unbatched value, upper', (4,), (4,), (4, 4), False),117      ('batched loc, unbatched value', (4,), (7, 4), (4, 4), True),118      ('batched scale_tri, unbatched value', (4,), (4,), (7, 4, 4), True),119      ('unbatched, batched value', (3, 7, 4), (4,), (4, 4), True),120      ('batched loc, batched value', (3, 7, 4), (7, 4), (4, 4), True),121      ('batched scale_tri, batched value', (3, 7, 4), (4,), (7, 4, 4), True),122      ('batched scale_tri, batched value, upper',123       (3, 7, 4), (4,), (7, 4, 4), False),124  )125  def test_log_prob(self, value_shape, loc_shape, scale_tri_shape, is_lower):126    rng = np.random.default_rng(2022)127    loc = rng.normal(size=loc_shape)128    scale_tri = rng.normal(size=scale_tri_shape)129    value = rng.normal(size=value_shape)130    dist_kwargs = {'loc': loc, 'scale_tri': scale_tri, 'is_lower': is_lower}131    if is_lower:132      tfp_dist_kwargs = {'loc': loc, 'scale_tril': scale_tri}133    else:134      scale_tril = _get_scale_tril_from_scale_triu(scale_tri)135      tfp_dist_kwargs = {'loc': loc, 'scale_tril': scale_tril}136    super()._test_attribute(137        attribute_string='log_prob',138        dist_kwargs=dist_kwargs,139        tfp_dist_kwargs=tfp_dist_kwargs,140        call_args=(value,),141        assertion_fn=self.assertion_fn(rtol=1e-3))142  @chex.all_variants(with_pmap=False)143  @parameterized.named_parameters(144      ('unbatched', (4,), (4, 4)),145      ('batched loc', (7, 4), (4, 4)),146      ('batched scale_tri', (4,), (7, 4, 4)),147  )148  def test_method(self, loc_shape, scale_tri_shape):149    rng = np.random.default_rng(2022)150    loc = rng.normal(size=loc_shape)151    scale_tri = rng.normal(size=scale_tri_shape)152    for method in ['entropy', 'mean', 'stddev', 'variance',153                   'covariance', 'mode']:154      for is_lower in [True, False]:155        if method in ['stddev', 'covariance', 'variance']:156          rtol = 2e-2 if is_lower else 5e-2157        else:158          rtol = 1e-3159        dist_kwargs = {'loc': loc, 'scale_tri': scale_tri, 'is_lower': is_lower}160        if is_lower:161          tfp_dist_kwargs = {'loc': loc, 'scale_tril': scale_tri}162        else:163          scale_tril = _get_scale_tril_from_scale_triu(scale_tri)164          tfp_dist_kwargs = {'loc': loc, 'scale_tril': scale_tril}165        with self.subTest(method=method, is_lower=is_lower):166          super()._test_attribute(167              method,168              dist_kwargs=dist_kwargs,169              tfp_dist_kwargs=tfp_dist_kwargs,170              assertion_fn=self.assertion_fn(rtol=rtol))171  @chex.all_variants(with_pmap=False)172  @parameterized.named_parameters(173      ('kl distrax_to_distrax', 'kl_divergence', 'distrax_to_distrax'),174      ('kl distrax_to_tfp', 'kl_divergence', 'distrax_to_tfp'),175      ('kl tfp_to_distrax', 'kl_divergence', 'tfp_to_distrax'),176      ('cross-ent distrax_to_distrax', 'cross_entropy', 'distrax_to_distrax'),177      ('cross-ent distrax_to_tfp', 'cross_entropy', 'distrax_to_tfp'),178      ('cross-ent tfp_to_distrax', 'cross_entropy', 'tfp_to_distrax'))179  def test_with_two_distributions(self, function_string, mode_string):180    rng = np.random.default_rng(2022)181    loc1 = rng.normal(size=(5, 1, 4))182    scale_tri1 = rng.normal(size=(3, 4, 4))183    loc2 = rng.normal(size=(3, 4))184    scale_tri2 = rng.normal(size=(4, 4))185    for is_lower in [True, False]:186      dist1_kwargs = {187          'loc': loc1, 'scale_tri': scale_tri1, 'is_lower': is_lower}188      dist2_kwargs = {189          'loc': loc2, 'scale_tri': scale_tri2, 'is_lower': is_lower}190      if is_lower:191        tfp_dist1_kwargs = {'loc': loc1, 'scale_tril': scale_tri1}192        tfp_dist2_kwargs = {'loc': loc2, 'scale_tril': scale_tri2}193      else:194        tfp_dist1_kwargs = {195            'loc': loc1,196            'scale_tril': _get_scale_tril_from_scale_triu(scale_tri1)197        }198        tfp_dist2_kwargs = {199            'loc': loc2,200            'scale_tril': _get_scale_tril_from_scale_triu(scale_tri2)201        }202      with self.subTest(is_lower=is_lower):203        super()._test_with_two_distributions(204            attribute_string=function_string,205            mode_string=mode_string,206            dist1_kwargs=dist1_kwargs,207            dist2_kwargs=dist2_kwargs,208            tfp_dist1_kwargs=tfp_dist1_kwargs,209            tfp_dist2_kwargs=tfp_dist2_kwargs,210            assertion_fn=self.assertion_fn(rtol=1e-3))211  def test_jittable(self):212    super()._test_jittable(213        dist_kwargs={'loc': np.zeros((4,))},214        assertion_fn=self.assertion_fn(rtol=1e-3))215  @parameterized.named_parameters(216      ('single element', 2),217      ('range', slice(-1)),218      ('range_2', (slice(None), slice(-1))),219  )220  def test_slice(self, slice_):221    rng = np.random.default_rng(2022)222    loc = rng.normal(size=(6, 5, 4))223    scale_tri = rng.normal(size=(4, 4))224    for is_lower in [True, False]:225      with self.subTest(is_lower=is_lower):226        dist_kwargs = {'loc': loc, 'scale_tri': scale_tri, 'is_lower': is_lower}227        dist = self.distrax_cls(**dist_kwargs)228        self.assertEqual(dist[slice_].batch_shape, loc[slice_].shape[:-1])229        self.assertEqual(dist[slice_].event_shape, dist.event_shape)230        self.assertEqual(dist[slice_].is_lower, dist.is_lower)231        self.assertion_fn(rtol=1e-3)(dist[slice_].mean(), loc[slice_])232  def test_slice_ellipsis(self):233    rng = np.random.default_rng(2022)234    loc = rng.normal(size=(6, 5, 4))235    scale_tri = rng.normal(size=(4, 4))236    for is_lower in [True, False]:237      with self.subTest(is_lower=is_lower):238        dist_kwargs = {'loc': loc, 'scale_tri': scale_tri, 'is_lower': is_lower}239        dist = self.distrax_cls(**dist_kwargs)240        self.assertEqual(dist[..., -1].batch_shape, (6,))241        self.assertEqual(dist[..., -1].event_shape, dist.event_shape)242        self.assertEqual(dist[..., -1].is_lower, dist.is_lower)243        self.assertion_fn(rtol=1e-3)(dist[..., -1].mean(), loc[:, -1, :])244if __name__ == '__main__':...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
