Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More

How to use tokenize method in avocado

Best Python code snippet using avocado_python

slim.js

Source:slim.js

...69        if (stream.pos < pos) {70          stream.pos = pos;71          return style;72        }73        return state.tokenize(stream, state);74      };75      return function(stream, state) {76        state.tokenize = restore;77        return tokenize(stream, state);78      };79    }80    function maybeBackup(stream, state, pat, offset, style) {81      var cur = stream.current();82      var idx = cur.search(pat);83      if (idx > -1) {84        state.tokenize = backup(stream.pos, state.tokenize, style);85        stream.backUp(cur.length - idx - offset);86      }87      return style;88    }89    function continueLine(state, column) {90      state.stack = {91        parent: state.stack,92        style: "continuation",93        indented: column,94        tokenize: state.line95      };96      state.line = state.tokenize;97    }98    function finishContinue(state) {99      if (state.line == state.tokenize) {100        state.line = state.stack.tokenize;101        state.stack = state.stack.parent;102      }103    }104    function lineContinuable(column, tokenize) {105      return function(stream, state) {106        finishContinue(state);107        if (stream.match(/^\\$/)) {108          continueLine(state, column);109          return "lineContinuation";110        }111        var style = tokenize(stream, state);112        if (stream.eol() && stream.current().match(/(?:^|[^\\])(?:\\\\)*\\$/)) {113          stream.backUp(1);114        }115        return style;116      };117    }118    function commaContinuable(column, tokenize) {119      return function(stream, state) {120        finishContinue(state);121        var style = tokenize(stream, state);122        if (stream.eol() && stream.current().match(/,$/)) {123          continueLine(state, column);124        }125        return style;126      };127    }128    function rubyInQuote(endQuote, tokenize) {129      // TODO: add multi line support130      return function(stream, state) {131        var ch = stream.peek();132        if (ch == endQuote && state.rubyState.tokenize.length == 1) {133          // step out of ruby context as it seems to complete processing all the braces134          stream.next();135          state.tokenize = tokenize;136          return "closeAttributeTag";137        } else {138          return ruby(stream, state);139        }140      };141    }142    function startRubySplat(tokenize) {143      var rubyState;144      var runSplat = function(stream, state) {145        if (state.rubyState.tokenize.length == 1 && !state.rubyState.context.prev) {146          stream.backUp(1);147          if (stream.eatSpace()) {148            state.rubyState = rubyState;149            state.tokenize = tokenize;150            return tokenize(stream, state);151          }152          stream.next();153        }154        return ruby(stream, state);155      };156      return function(stream, state) {157        rubyState = state.rubyState;158        state.rubyState = CodeMirror.startState(rubyMode);159        state.tokenize = runSplat;160        return ruby(stream, state);161      };162    }163    function ruby(stream, state) {164      return rubyMode.token(stream, state.rubyState);165    }166    function htmlLine(stream, state) {167      if (stream.match(/^\\$/)) {168        return "lineContinuation";169      }170      return html(stream, state);171    }172    function html(stream, state) {173      if (stream.match(/^#\{/)) {174        state.tokenize = rubyInQuote("}", state.tokenize);175        return null;176      }177      return maybeBackup(stream, state, /[^\\]#\{/, 1, htmlMode.token(stream, state.htmlState));178    }179    function startHtmlLine(lastTokenize) {180      return function(stream, state) {181        var style = htmlLine(stream, state);182        if (stream.eol()) state.tokenize = lastTokenize;183        return style;184      };185    }186    function startHtmlMode(stream, state, offset) {187      state.stack = {188        parent: state.stack,189        style: "html",190        indented: stream.column() + offset, // pipe + space191        tokenize: state.line192      };193      state.line = state.tokenize = html;194      return null;195    }196    function comment(stream, state) {197      stream.skipToEnd();198      return state.stack.style;199    }200    function commentMode(stream, state) {201      state.stack = {202        parent: state.stack,203        style: "comment",204        indented: state.indented + 1,205        tokenize: state.line206      };207      state.line = comment;208      return comment(stream, state);209    }210    function attributeWrapper(stream, state) {211      if (stream.eat(state.stack.endQuote)) {212        state.line = state.stack.line;213        state.tokenize = state.stack.tokenize;214        state.stack = state.stack.parent;215        return null;216      }217      if (stream.match(wrappedAttributeNameRegexp)) {218        state.tokenize = attributeWrapperAssign;219        return "slimAttribute";220      }221      stream.next();222      return null;223    }224    function attributeWrapperAssign(stream, state) {225      if (stream.match(/^==?/)) {226        state.tokenize = attributeWrapperValue;227        return null;228      }229      return attributeWrapper(stream, state);230    }231    function attributeWrapperValue(stream, state) {232      var ch = stream.peek();233      if (ch == '"' || ch == "\'") {234        state.tokenize = readQuoted(ch, "string", true, false, attributeWrapper);235        stream.next();236        return state.tokenize(stream, state);237      }238      if (ch == '[') {239        return startRubySplat(attributeWrapper)(stream, state);240      }241      if (stream.match(/^(true|false|nil)\b/)) {242        state.tokenize = attributeWrapper;243        return "keyword";244      }245      return startRubySplat(attributeWrapper)(stream, state);246    }247    function startAttributeWrapperMode(state, endQuote, tokenize) {248      state.stack = {249        parent: state.stack,250        style: "wrapper",251        indented: state.indented + 1,252        tokenize: tokenize,253        line: state.line,254        endQuote: endQuote255      };256      state.line = state.tokenize = attributeWrapper;257      return null;258    }259    function sub(stream, state) {260      if (stream.match(/^#\{/)) {261        state.tokenize = rubyInQuote("}", state.tokenize);262        return null;263      }264      var subStream = new CodeMirror.StringStream(stream.string.slice(state.stack.indented), stream.tabSize);265      subStream.pos = stream.pos - state.stack.indented;266      subStream.start = stream.start - state.stack.indented;267      subStream.lastColumnPos = stream.lastColumnPos - state.stack.indented;268      subStream.lastColumnValue = stream.lastColumnValue - state.stack.indented;269      var style = state.subMode.token(subStream, state.subState);270      stream.pos = subStream.pos + state.stack.indented;271      return style;272    }273    function firstSub(stream, state) {274      state.stack.indented = stream.column();275      state.line = state.tokenize = sub;276      return state.tokenize(stream, state);277    }278    function createMode(mode) {279      var query = embedded[mode];280      var spec = CodeMirror.mimeModes[query];281      if (spec) {282        return CodeMirror.getMode(config, spec);283      }284      var factory = CodeMirror.modes[query];285      if (factory) {286        return factory(config, {name: query});287      }288      return CodeMirror.getMode(config, "null");289    }290    function getMode(mode) {291      if (!modes.hasOwnProperty(mode)) {292        return modes[mode] = createMode(mode);293      }294      return modes[mode];295    }296    function startSubMode(mode, state) {297      var subMode = getMode(mode);298      var subState = CodeMirror.startState(subMode);299      state.subMode = subMode;300      state.subState = subState;301      state.stack = {302        parent: state.stack,303        style: "sub",304        indented: state.indented + 1,305        tokenize: state.line306      };307      state.line = state.tokenize = firstSub;308      return "slimSubmode";309    }310    function doctypeLine(stream, _state) {311      stream.skipToEnd();312      return "slimDoctype";313    }314    function startLine(stream, state) {315      var ch = stream.peek();316      if (ch == '<') {317        return (state.tokenize = startHtmlLine(state.tokenize))(stream, state);318      }319      if (stream.match(/^[|']/)) {320        return startHtmlMode(stream, state, 1);321      }322      if (stream.match(/^\/(!|\[\w+])?/)) {323        return commentMode(stream, state);324      }325      if (stream.match(/^(-|==?[<>]?)/)) {326        state.tokenize = lineContinuable(stream.column(), commaContinuable(stream.column(), ruby));327        return "slimSwitch";328      }329      if (stream.match(/^doctype\b/)) {330        state.tokenize = doctypeLine;331        return "keyword";332      }333      var m = stream.match(embeddedRegexp);334      if (m) {335        return startSubMode(m[1], state);336      }337      return slimTag(stream, state);338    }339    function slim(stream, state) {340      if (state.startOfLine) {341        return startLine(stream, state);342      }343      return slimTag(stream, state);344    }345    function slimTag(stream, state) {346      if (stream.eat('*')) {347        state.tokenize = startRubySplat(slimTagExtras);348        return null;349      }350      if (stream.match(nameRegexp)) {351        state.tokenize = slimTagExtras;352        return "slimTag";353      }354      return slimClass(stream, state);355    }356    function slimTagExtras(stream, state) {357      if (stream.match(/^(<>?|><?)/)) {358        state.tokenize = slimClass;359        return null;360      }361      return slimClass(stream, state);362    }363    function slimClass(stream, state) {364      if (stream.match(classIdRegexp)) {365        state.tokenize = slimClass;366        return "slimId";367      }368      if (stream.match(classNameRegexp)) {369        state.tokenize = slimClass;370        return "slimClass";371      }372      return slimAttribute(stream, state);373    }374    function slimAttribute(stream, state) {375      if (stream.match(/^([\[\{\(])/)) {376        return startAttributeWrapperMode(state, closing[RegExp.$1], slimAttribute);377      }378      if (stream.match(attributeNameRegexp)) {379        state.tokenize = slimAttributeAssign;380        return "slimAttribute";381      }382      if (stream.peek() == '*') {383        stream.next();384        state.tokenize = startRubySplat(slimContent);385        return null;386      }387      return slimContent(stream, state);388    }389    function slimAttributeAssign(stream, state) {390      if (stream.match(/^==?/)) {391        state.tokenize = slimAttributeValue;392        return null;393      }394      // should never happen, because of forward lookup395      return slimAttribute(stream, state);396    }397    function slimAttributeValue(stream, state) {398      var ch = stream.peek();399      if (ch == '"' || ch == "\'") {400        state.tokenize = readQuoted(ch, "string", true, false, slimAttribute);401        stream.next();402        return state.tokenize(stream, state);403      }404      if (ch == '[') {405        return startRubySplat(slimAttribute)(stream, state);406      }407      if (ch == ':') {408        return startRubySplat(slimAttributeSymbols)(stream, state);409      }410      if (stream.match(/^(true|false|nil)\b/)) {411        state.tokenize = slimAttribute;412        return "keyword";413      }414      return startRubySplat(slimAttribute)(stream, state);415    }416    function slimAttributeSymbols(stream, state) {417      stream.backUp(1);418      if (stream.match(/^[^\s],(?=:)/)) {419        state.tokenize = startRubySplat(slimAttributeSymbols);420        return null;421      }422      stream.next();423      return slimAttribute(stream, state);424    }425    function readQuoted(quote, style, embed, unescaped, nextTokenize) {426      return function(stream, state) {427        finishContinue(state);428        var fresh = stream.current().length == 0;429        if (stream.match(/^\\$/, fresh)) {430          if (!fresh) return style;431          continueLine(state, state.indented);432          return "lineContinuation";433        }434        if (stream.match(/^#\{/, fresh)) {435          if (!fresh) return style;436          state.tokenize = rubyInQuote("}", state.tokenize);437          return null;438        }439        var escaped = false, ch;440        while ((ch = stream.next()) != null) {441          if (ch == quote && (unescaped || !escaped)) {442            state.tokenize = nextTokenize;443            break;444          }445          if (embed && ch == "#" && !escaped) {446            if (stream.eat("{")) {447              stream.backUp(2);448              break;449            }450          }451          escaped = !escaped && ch == "\\";452        }453        if (stream.eol() && escaped) {454          stream.backUp(1);455        }456        return style;457      };458    }459    function slimContent(stream, state) {460      if (stream.match(/^==?/)) {461        state.tokenize = ruby;462        return "slimSwitch";463      }464      if (stream.match(/^\/$/)) { // tag close hint465        state.tokenize = slim;466        return null;467      }468      if (stream.match(/^:/)) { // inline tag469        state.tokenize = slimTag;470        return "slimSwitch";471      }472      startHtmlMode(stream, state, 0);473      return state.tokenize(stream, state);474    }475    var mode = {476      // default to html mode477      startState: function() {478        var htmlState = CodeMirror.startState(htmlMode);479        var rubyState = CodeMirror.startState(rubyMode);480        return {481          htmlState: htmlState,482          rubyState: rubyState,483          stack: null,484          last: null,485          tokenize: slim,486          line: slim,487          indented: 0488        };489      },490      copyState: function(state) {491        return {492          htmlState : CodeMirror.copyState(htmlMode, state.htmlState),493          rubyState: CodeMirror.copyState(rubyMode, state.rubyState),494          subMode: state.subMode,495          subState: state.subMode && CodeMirror.copyState(state.subMode, state.subState),496          stack: state.stack,497          last: state.last,498          tokenize: state.tokenize,499          line: state.line500        };501      },502      token: function(stream, state) {503        if (stream.sol()) {504          state.indented = stream.indentation();505          state.startOfLine = true;506          state.tokenize = state.line;507          while (state.stack && state.stack.indented > state.indented && state.last != "slimSubmode") {508            state.line = state.tokenize = state.stack.tokenize;509            state.stack = state.stack.parent;510            state.subMode = null;511            state.subState = null;512          }513        }514        if (stream.eatSpace()) return null;515        var style = state.tokenize(stream, state);516        state.startOfLine = false;517        if (style) state.last = style;518        return styleMap.hasOwnProperty(style) ? styleMap[style] : style;519      },520      blankLine: function(state) {521        if (state.subMode && state.subMode.blankLine) {522          return state.subMode.blankLine(state.subState);523        }524      },525      innerMode: function(state) {526        if (state.subMode) return {state: state.subState, mode: state.subMode};527        return {state: state, mode: mode};528      }529      //indent: function(state) {...

test_base.py

Source:test_base.py

...46    assert (normalize_function(tz.curry(f2, b=1)) ==47            normalize_function(tz.curry(f2, b=1)))48    assert (normalize_function(tz.curry(f2, b=1)) !=49            normalize_function(tz.curry(f2, b=2)))50def test_tokenize():51    a = (1, 2, 3)52    assert isinstance(tokenize(a), (str, bytes))53@pytest.mark.skipif('not np')54def test_tokenize_numpy_array_consistent_on_values():55    assert (tokenize(np.random.RandomState(1234).random_sample(1000)) ==56            tokenize(np.random.RandomState(1234).random_sample(1000)))57@pytest.mark.skipif('not np')58def test_tokenize_numpy_array_supports_uneven_sizes():59    tokenize(np.random.random(7).astype(dtype='i2'))60@pytest.mark.skipif('not np')61def test_tokenize_discontiguous_numpy_array():62    tokenize(np.random.random(8)[::2])63@pytest.mark.skipif('not np')64def test_tokenize_numpy_datetime():65    tokenize(np.array(['2000-01-01T12:00:00'], dtype='M8[ns]'))66@pytest.mark.skipif('not np')67def test_tokenize_numpy_scalar():68    assert tokenize(np.array(1.0, dtype='f8')) == tokenize(np.array(1.0, dtype='f8'))69    assert (tokenize(np.array([(1, 2)], dtype=[('a', 'i4'), ('b', 'i8')])[0]) ==70            tokenize(np.array([(1, 2)], dtype=[('a', 'i4'), ('b', 'i8')])[0]))71@pytest.mark.skipif('not np')72def test_tokenize_numpy_array_on_object_dtype():73    assert (tokenize(np.array(['a', 'aa', 'aaa'], dtype=object)) ==74            tokenize(np.array(['a', 'aa', 'aaa'], dtype=object)))75    assert (tokenize(np.array(['a', None, 'aaa'], dtype=object)) ==76            tokenize(np.array(['a', None, 'aaa'], dtype=object)))77    assert (tokenize(np.array([(1, 'a'), (1, None), (1, 'aaa')], dtype=object)) ==78            tokenize(np.array([(1, 'a'), (1, None), (1, 'aaa')], dtype=object)))79    if sys.version_info[0] == 2:80        assert (tokenize(np.array([unicode("Rebeca AlÃ³n", encoding="utf-8")], dtype=object)) ==81                tokenize(np.array([unicode("Rebeca AlÃ³n", encoding="utf-8")], dtype=object)))82@pytest.mark.skipif('not np')83def test_tokenize_numpy_memmap():84    with tmpfile('.npy') as fn:85        x = np.arange(5)86        np.save(fn, x)87        y = tokenize(np.load(fn, mmap_mode='r'))88    with tmpfile('.npy') as fn:89        x = np.arange(5)90        np.save(fn, x)91        z = tokenize(np.load(fn, mmap_mode='r'))92    assert y != z93    with tmpfile('.npy') as fn:94        x = np.random.normal(size=(10, 10))95        np.save(fn, x)96        mm = np.load(fn, mmap_mode='r')97        mm2 = np.load(fn, mmap_mode='r')98        a = tokenize(mm[0, :])99        b = tokenize(mm[1, :])100        c = tokenize(mm[0:3, :])101        d = tokenize(mm[:, 0])102        assert len(set([a, b, c, d])) == 4103        assert tokenize(mm) == tokenize(mm2)104        assert tokenize(mm[1, :]) == tokenize(mm2[1, :])105@pytest.mark.skipif('not np')106def test_tokenize_numpy_memmap_no_filename():107    # GH 1562:108    with tmpfile('.npy') as fn1, tmpfile('.npy') as fn2:109        x = np.arange(5)110        np.save(fn1, x)111        np.save(fn2, x)112        a = np.load(fn1, mmap_mode='r')113        b = a + a114        assert tokenize(b) == tokenize(b)115@pytest.mark.skipif('not np')116def test_tokenize_numpy_ufunc_consistent():117    assert tokenize(np.sin) == '02106e2c67daf452fb480d264e0dac21'118    assert tokenize(np.cos) == 'c99e52e912e4379882a9a4b387957a0b'119    # Make a ufunc that isn't in the numpy namespace. Similar to120    # any found in other packages.121    inc = np.frompyfunc(lambda x: x + 1, 1, 1)122    assert tokenize(inc) == tokenize(inc)123def test_normalize_base():124    for i in [1, long(1), 1.1, '1', slice(1, 2, 3)]:125        assert normalize_token(i) is i126@pytest.mark.skipif('not pd')127def test_tokenize_pandas():128    a = pd.DataFrame({'x': [1, 2, 3], 'y': ['4', 'asd', None]}, index=[1, 2, 3])129    b = pd.DataFrame({'x': [1, 2, 3], 'y': ['4', 'asd', None]}, index=[1, 2, 3])130    assert tokenize(a) == tokenize(b)131    b.index.name = 'foo'132    assert tokenize(a) != tokenize(b)133    a = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'a']})134    b = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'a']})135    a['z'] = a.y.astype('category')136    assert tokenize(a) != tokenize(b)137    b['z'] = a.y.astype('category')138    assert tokenize(a) == tokenize(b)139def test_tokenize_kwargs():140    assert tokenize(5, x=1) == tokenize(5, x=1)141    assert tokenize(5) != tokenize(5, x=1)142    assert tokenize(5, x=1) != tokenize(5, x=2)143    assert tokenize(5, x=1) != tokenize(5, y=1)144def test_tokenize_same_repr():145    class Foo(object):146        def __init__(self, x):147            self.x = x148        def __repr__(self):149            return 'a foo'150    assert tokenize(Foo(1)) != tokenize(Foo(2))151@pytest.mark.skipif('not np')152def test_tokenize_sequences():153    assert tokenize([1]) != tokenize([2])154    assert tokenize([1]) != tokenize((1,))155    assert tokenize([1]) == tokenize([1])156    x = np.arange(2000)  # long enough to drop information in repr157    y = np.arange(2000)158    y[1000] = 0  # middle isn't printed in repr159    assert tokenize([x]) != tokenize([y])160def test_tokenize_dict():161    assert tokenize({'x': 1, 1: 'x'}) == tokenize({'x': 1, 1: 'x'})162def test_tokenize_set():163    assert tokenize({1, 2, 'x', (1, 'x')}) == tokenize({1, 2, 'x', (1, 'x')})164def test_tokenize_ordered_dict():165    with ignoring(ImportError):166        from collections import OrderedDict167        a = OrderedDict([('a', 1), ('b', 2)])168        b = OrderedDict([('a', 1), ('b', 2)])169        c = OrderedDict([('b', 2), ('a', 1)])170        assert tokenize(a) == tokenize(b)171        assert tokenize(a) != tokenize(c)172@pytest.mark.skipif('not np')173def test_tokenize_object_array_with_nans():174    a = np.array([u'foo', u'Jos\xe9', np.nan], dtype='O')175    assert tokenize(a) == tokenize(a)176@pytest.mark.parametrize('x', [1, True, 'a', b'a', 1.0, 1j, 1.0j,177                               [], (), {}, None, str, int])178def test_tokenize_base_types(x):179    assert tokenize(x) == tokenize(x), x180@pytest.mark.skipif('not db')181def test_compute_no_opt():182    # Bag does `fuse` by default. Test that with `optimize_graph=False` that183    # doesn't get called. We check this by using a callback to track the keys184    # that are computed.185    from dask.callbacks import Callback186    b = db.from_sequence(range(100), npartitions=4)187    add1 = tz.partial(add, 1)188    mul2 = tz.partial(mul, 2)189    o = b.map(add1).map(mul2)190    # Check that with the kwarg, the optimization doesn't happen191    keys = []192    with Callback(pretask=lambda key, *args: keys.append(key)):193        o.compute(get=dask.get, optimize_graph=False)...

Tokenizer.test.js

Source:Tokenizer.test.js

...8    })910    it("intentionally fail with invalid character",()=>{11        expect(() => {12            tokenizer.tokenize('\\');13          }).toThrow();14    })1516    it("intentionally fail with open ended string",()=>{17        expect(() => {18            tokenizer.tokenize('"');19          }).toThrow();2021        expect(() => {22            tokenizer.tokenize("'");23        }).toThrow();24    })2526    it("intentionally fail with incomplete OR",()=>{27        expect(() => {28            tokenizer.tokenize('|');29        }).toThrow();30    })3132    it("intentionally fail with incomplete AND",()=>{33        expect(() => {34            tokenizer.tokenize('&');35        }).toThrow();36    })37    38    it("whitespace program",()=>{39        expect(tokenizer.tokenize(' ')).toEqual([{line: 1, type: TokenType.NewLine, value: ""}]);40    })4142    it("blank program",()=>{43        expect(tokenizer.tokenize('')).toEqual([{line: 1, type: TokenType.NewLine, value: null}]);44    })4546    it("comment only program has just a newline token with the comment",()=>{47        const commentCode = '//adsasdas';48        let tokenList=tokenizer.tokenize(commentCode);49        expect(tokenList).toEqual([{line: 1, type: TokenType.NewLine, value: commentCode}]);50    })5152    it("LineDelim",()=>{53        expect(tokenizer.tokenize(';')[0]).toEqual({line: 1, type: TokenType.LineDelim, value: null});54    })5556    it("NewLine",()=>{57        expect(tokenizer.tokenize('\n')[0]).toEqual({line: 2, type: TokenType.NewLine, value: ""});58    })5960    it("Double",()=>{61        expect(tokenizer.tokenize('double')[0]).toEqual({line: 1, type: TokenType.Double, value: null});62    })6364    it("String",()=>{65        expect(tokenizer.tokenize('string')[0]).toEqual({line: 1, type: TokenType.String, value: null});66    })6768    it("Bool",()=>{69        expect(tokenizer.tokenize('bool')[0]).toEqual({line: 1, type: TokenType.Bool, value: null});70    })7172    it("DoubleLiteral",()=>{73        expect(tokenizer.tokenize('123.45')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: 123.45});74        expect(tokenizer.tokenize('45')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: 45});75        expect(tokenizer.tokenize('.45')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: .45});76        expect(tokenizer.tokenize('45.')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: 45.});77    })7879    it("StringLiteral",()=>{80        expect(tokenizer.tokenize('""')[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: ""});81        expect(tokenizer.tokenize('"1"')[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: "1"});82        const allCharDQ='`1234567890-=qwertyuiop[]\\asdfghjkl;\'zxcvbnm,./~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:ZXCVBNM<>?';83        expect(tokenizer.tokenize('"'+allCharDQ+'"')[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: allCharDQ});8485        expect(tokenizer.tokenize("''")[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: ""});86        expect(tokenizer.tokenize("'1'")[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: "1"});87        const allCharSQ="`1234567890-=qwertyuiop[]\\asdfghjkl;\"zxcvbnm,./~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:ZXCVBNM<>?";88        expect(tokenizer.tokenize("'"+allCharSQ+"'")[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: allCharSQ});89    })9091    it("Ident",()=>{92        const expectGoodIdent = (name) => {93            expect(tokenizer.tokenize(name)[0]).toEqual({line: 1, type: TokenType.Ident, value: name});94        }95        const expectBadIdent = (name) => {96            expect(tokenizer.tokenize(name)[0]).not.toEqual({line: 1, type: TokenType.Ident, value: name});97        }9899        expectGoodIdent("a");100        expectGoodIdent("_");101        expectGoodIdent("True");102        expectGoodIdent("False");103        expectGoodIdent("Null");104        expectGoodIdent("_qwertyuiopasdfghjklzxcvbnm_QWERTYUIOPASDFGHJKLZXCVBNM1234567890_");105        expectBadIdent("0NAME");106        expectBadIdent("A.B");107    })108109    it("True",()=>{110        expect(tokenizer.tokenize('true')[0]).toEqual({line: 1, type: TokenType.True, value: null});111    })112113    it("False",()=>{114        expect(tokenizer.tokenize('false')[0]).toEqual({line: 1, type: TokenType.False, value: null});115    })116117    it("Null",()=>{118        expect(tokenizer.tokenize('null')[0]).toEqual({line: 1, type: TokenType.Null, value: null});119    })120121    it("LeftParen",()=>{122        expect(tokenizer.tokenize('(')[0]).toEqual({line: 1, type: TokenType.LeftParen, value: null});123    })124125    it("RightParen",()=>{126        expect(tokenizer.tokenize(')')[0]).toEqual({line: 1, type: TokenType.RightParen, value: null});127    })128129    it("LeftSqaure",()=>{130        expect(tokenizer.tokenize('[')[0]).toEqual({line: 1, type: TokenType.LeftSqaure, value: null});131    })132133    it("RightSqaure",()=>{134        expect(tokenizer.tokenize(']')[0]).toEqual({line: 1, type: TokenType.RightSqaure, value: null});135    })136137    it("Comma",()=>{138        expect(tokenizer.tokenize(',')[0]).toEqual({line: 1, type: TokenType.Comma, value: null});139    })140141    it("Dot",()=>{142        expect(tokenizer.tokenize('.')[0]).toEqual({line: 1, type: TokenType.Dot, value: null});143    })144145    it("Not",()=>{146        expect(tokenizer.tokenize('!')[0]).toEqual({line: 1, type: TokenType.Not, value: null});147    })148149    it("And",()=>{150        expect(tokenizer.tokenize('&&')[0]).toEqual({line: 1, type: TokenType.And, value: null});151    })152153    it("Or",()=>{154        expect(tokenizer.tokenize('||')[0]).toEqual({line: 1, type: TokenType.Or, value: null});155    })156157    it("Plus",()=>{158        expect(tokenizer.tokenize('+')[0]).toEqual({line: 1, type: TokenType.Plus, value: null});159    })160161    it("Minus",()=>{162        expect(tokenizer.tokenize('-')[0]).toEqual({line: 1, type: TokenType.Minus, value: null});163    })164165    it("Divide",()=>{166        expect(tokenizer.tokenize('/')[0]).toEqual({line: 1, type: TokenType.Divide, value: null});167    })168169    it("Multiply",()=>{170        expect(tokenizer.tokenize('*')[0]).toEqual({line: 1, type: TokenType.Multiply, value: null});171    })172173    it("Mod",()=>{174        expect(tokenizer.tokenize('%')[0]).toEqual({line: 1, type: TokenType.Mod, value: null});175    })176177    it("Exponent",()=>{178        expect(tokenizer.tokenize('^')[0]).toEqual({line: 1, type: TokenType.Exponent, value: null});179    })180181    it("Question",()=>{182        expect(tokenizer.tokenize('?')[0]).toEqual({line: 1, type: TokenType.Question, value: null});183    })184185    it("Colon",()=>{186        expect(tokenizer.tokenize(':')[0]).toEqual({line: 1, type: TokenType.Colon, value: null});187    })188189    it("Assignment",()=>{190        expect(tokenizer.tokenize('=')[0]).toEqual({line: 1, type: TokenType.Assignment, value: null});191    })192193    it("Equals",()=>{194        expect(tokenizer.tokenize('==')[0]).toEqual({line: 1, type: TokenType.Equals, value: null});195    })196197    it("NotEquals",()=>{198        expect(tokenizer.tokenize('!=')[0]).toEqual({line: 1, type: TokenType.NotEquals, value: null});199    })200201    it("Lesser",()=>{202        expect(tokenizer.tokenize('<')[0]).toEqual({line: 1, type: TokenType.Lesser, value: null});203    })204205    it("LesserEquals",()=>{206        expect(tokenizer.tokenize('<=')[0]).toEqual({line: 1, type: TokenType.LesserEquals, value: null});207    })208209    it("Greater",()=>{210        expect(tokenizer.tokenize('>')[0]).toEqual({line: 1, type: TokenType.Greater, value: null});211    })212213    it("GreaterEquals",()=>{214        expect(tokenizer.tokenize('>=')[0]).toEqual({line: 1, type: TokenType.GreaterEquals, value: null});215    })216217    it("Min",()=>{218        expect(tokenizer.tokenize('min')[0]).toEqual({line: 1, type: TokenType.Min, value: null});219    })220221    it("Max",()=>{222        expect(tokenizer.tokenize('max')[0]).toEqual({line: 1, type: TokenType.Max, value: null});223    })224225    it("Abs",()=>{226        expect(tokenizer.tokenize('abs')[0]).toEqual({line: 1, type: TokenType.Abs, value: null});227    })228229    it("Clamp",()=>{230        expect(tokenizer.tokenize('clamp')[0]).toEqual({line: 1, type: TokenType.Clamp, value: null});231    })232233    it("Floor",()=>{234        expect(tokenizer.tokenize('floor')[0]).toEqual({line: 1, type: TokenType.Floor, value: null});235    })236237    it("Ceil",()=>{238        expect(tokenizer.tokenize('ceil')[0]).toEqual({line: 1, type: TokenType.Ceil, value: null});239    })240241    it("LCase",()=>{242        expect(tokenizer.tokenize('lcase')[0]).toEqual({line: 1, type: TokenType.LCase, value: null});243    })244245    it("UCase",()=>{246        expect(tokenizer.tokenize('ucase')[0]).toEqual({line: 1, type: TokenType.UCase, value: null});247    })248249    it("Trim",()=>{250        expect(tokenizer.tokenize('trim')[0]).toEqual({line: 1, type: TokenType.Trim, value: null});251    })252253    it("Len",()=>{254        expect(tokenizer.tokenize('len')[0]).toEqual({line: 1, type: TokenType.Len, value: null});255    })256257    it("SubStr",()=>{258        expect(tokenizer.tokenize('substr')[0]).toEqual({line: 1, type: TokenType.SubStr, value: null});259    })260261    it("While",()=>{262        expect(tokenizer.tokenize('while')[0]).toEqual({line: 1, type: TokenType.While, value: null});263    })264265    it("For",()=>{266        expect(tokenizer.tokenize('for')[0]).toEqual({line: 1, type: TokenType.For, value: null});267    })268269    it("Loop",()=>{270        expect(tokenizer.tokenize('loop')[0]).toEqual({line: 1, type: TokenType.Loop, value: null});271    })272273    it("If",()=>{274        expect(tokenizer.tokenize('if')[0]).toEqual({line: 1, type: TokenType.If, value: null});275    })276277    it("Else",()=>{278        expect(tokenizer.tokenize('else')[0]).toEqual({line: 1, type: TokenType.Else, value: null});279    })280281    it("Break",()=>{282        expect(tokenizer.tokenize('break')[0]).toEqual({line: 1, type: TokenType.Break, value: null});283    })284285    it("LeftCurly",()=>{286        expect(tokenizer.tokenize('{')[0]).toEqual({line: 1, type: TokenType.LeftCurly, value: null});287    })288289    it("RightCurly",()=>{290        expect(tokenizer.tokenize('}')[0]).toEqual({line: 1, type: TokenType.RightCurly, value: null});291    })292293    it("Return",()=>{294        expect(tokenizer.tokenize('return')[0]).toEqual({line: 1, type: TokenType.Return, value: null});295    })296297    it("Exit",()=>{298        expect(tokenizer.tokenize('exit')[0]).toEqual({line: 1, type: TokenType.Exit, value: null});299    })
...

__init__.py

Source:__init__.py

...13tokenizers can be used to find the words and punctuation in a string:14    >>> from nltk.tokenize import word_tokenize15    >>> s = '''Good muffins cost $3.88\nin New York.  Please buy me16    ... two of them.\n\nThanks.'''17    >>> word_tokenize(s)18    ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.',19    'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']20This particular tokenizer requires the Punkt sentence tokenization21models to be installed. NLTK also provides a simpler,22regular-expression based tokenizer, which splits text on whitespace23and punctuation:24    >>> from nltk.tokenize import wordpunct_tokenize25    >>> wordpunct_tokenize(s)26    ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',27    'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']28We can also operate at the level of sentences, using the sentence29tokenizer directly as follows:30    >>> from nltk.tokenize import sent_tokenize, word_tokenize31    >>> sent_tokenize(s)32    ['Good muffins cost $3.88\nin New York.', 'Please buy me\ntwo of them.', 'Thanks.']33    >>> [word_tokenize(t) for t in sent_tokenize(s)]34    [['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.'],35    ['Please', 'buy', 'me', 'two', 'of', 'them', '.'], ['Thanks', '.']]36Caution: when tokenizing a Unicode string, make sure you are not37using an encoded version of the string (it may be necessary to38decode it first, e.g. with ``s.decode("utf8")``.39NLTK tokenizers can produce token-spans, represented as tuples of integers40having the same semantics as string slices, to support efficient comparison41of tokenizers.  (These methods are implemented as generators.)42    >>> from nltk.tokenize import WhitespaceTokenizer43    >>> list(WhitespaceTokenizer().span_tokenize(s))44    [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), (38, 44),45    (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)]46There are numerous ways to tokenize text.  If you need more control over47tokenization, see the other methods provided in this package.48For further information, please see Chapter 3 of the NLTK book.49"""50import re51from nltk.data import load52from nltk.tokenize.casual import TweetTokenizer, casual_tokenize53from nltk.tokenize.mwe import MWETokenizer54from nltk.tokenize.punkt import PunktSentenceTokenizer55from nltk.tokenize.regexp import (56    RegexpTokenizer,57    WhitespaceTokenizer,58    BlanklineTokenizer,59    WordPunctTokenizer,60    wordpunct_tokenize,61    regexp_tokenize,62    blankline_tokenize,63)64from nltk.tokenize.repp import ReppTokenizer65from nltk.tokenize.sexpr import SExprTokenizer, sexpr_tokenize66from nltk.tokenize.simple import (67    SpaceTokenizer,68    TabTokenizer,69    LineTokenizer,70    line_tokenize,71)72from nltk.tokenize.texttiling import TextTilingTokenizer73from nltk.tokenize.toktok import ToktokTokenizer74from nltk.tokenize.treebank import TreebankWordTokenizer75from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize76from nltk.tokenize.stanford_segmenter import StanfordSegmenter77from nltk.tokenize.sonority_sequencing import SyllableTokenizer78# Standard sentence tokenizer.79def sent_tokenize(text, language='english'):80    """81    Return a sentence-tokenized copy of *text*,82    using NLTK's recommended sentence tokenizer83    (currently :class:`.PunktSentenceTokenizer`84    for the specified language).85    :param text: text to split into sentences86    :param language: the model name in the Punkt corpus87    """88    tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))89    return tokenizer.tokenize(text)90# Standard word tokenizer.91_treebank_word_tokenizer = TreebankWordTokenizer()92# See discussion on https://github.com/nltk/nltk/pull/143793# Adding to TreebankWordTokenizer, nltk.word_tokenize now splits on94# - chervon quotes u'\xab' and u'\xbb' .95# - unicode quotes u'\u2018', u'\u2019', u'\u201c' and u'\u201d'96# See https://github.com/nltk/nltk/issues/1995#issuecomment-37674160897# Also, behavior of splitting on clitics now follows Stanford CoreNLP98# - clitics covered (?!re|ve|ll|m|t|s|d)(\w)\b99improved_open_quote_regex = re.compile(u'([Â«âââ]|[`]+)', re.U)100improved_open_single_quote_regex = re.compile(r"(?i)(\')(?!re|ve|ll|m|t|s|d)(\w)\b", re.U)101improved_close_quote_regex = re.compile(u'([Â»ââ])', re.U)102improved_punct_regex = re.compile(r'([^\.])(\.)([\]\)}>"\'' u'Â»ââ ' r']*)\s*$', re.U)103_treebank_word_tokenizer.STARTING_QUOTES.insert(0, (improved_open_quote_regex, r' \1 '))104_treebank_word_tokenizer.STARTING_QUOTES.append((improved_open_single_quote_regex, r'\1 \2'))105_treebank_word_tokenizer.ENDING_QUOTES.insert(0, (improved_close_quote_regex, r' \1 '))106_treebank_word_tokenizer.PUNCTUATION.insert(0, (improved_punct_regex, r'\1 \2 \3 '))107def word_tokenize(text, language='english', preserve_line=False):108    """109    Return a tokenized copy of *text*,110    using NLTK's recommended word tokenizer111    (currently an improved :class:`.TreebankWordTokenizer`112    along with :class:`.PunktSentenceTokenizer`113    for the specified language).114    :param text: text to split into words115    :type text: str116    :param language: the model name in the Punkt corpus117    :type language: str118    :param preserve_line: An option to keep the preserve the sentence and not sentence tokenize it.119    :type preserve_line: bool120    """121    sentences = [text] if preserve_line else sent_tokenize(text, language)122    return [123        token for sent in sentences for token in _treebank_word_tokenizer.tokenize(sent)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.