Best Python code snippet using nose2
morfeusz_builder
Source:morfeusz_builder  
1#!/usr/bin/python2# -*- coding:utf-8 -*-3'''4Created on 21 paź 20135@author: mlenart6'''7import os8import sys9import logging10import codecs11from morfeuszbuilder.fsa import encode12from morfeuszbuilder.fsa import convertinput13from morfeuszbuilder.fsa.fsa import FSA14from morfeuszbuilder.fsa.serializer import Serializer, SerializationMethod15from morfeuszbuilder.tagset.tagset import Tagset16from morfeuszbuilder.segrules import rulesParser17from morfeuszbuilder.utils import exceptions, limits18from optparse import OptionParser19def _checkOption(opt, parser, msg):20    if opt is None:21        print >> sys.stderr, msg22        parser.print_help()23        exit(1)24def _checkCondition(cond, parser, msg):25    if not cond:26        print >> sys.stderr, msg27        parser.print_help()28        exit(1)29def _parseListCallback(option, opt, value, parser):30    setattr(parser.values, option.dest, value.split(','))31def _checkOpen(filename, mode):32    try:33        with open(filename, mode) as _:34            pass35        if 'w' in mode:36            os.remove(filename)37    except IOError as ex:38        print >> sys.stderr, str(ex)39        exit(1)40def _getDictFilename(opts, isGenerator):41    typeCode = 's' if isGenerator else 'a'42    fname = '%s-%s.dict' % (opts.dictName, typeCode)43    return os.path.join(opts.dictDir, fname)44def _parseOptions():45    """46    Parses commandline args47    """48    parser = OptionParser()49    parser.add_option('--input-files',50                        type='string',51                        dest='inputFiles',52                        action='callback',53                        callback=_parseListCallback,54                        metavar='FILES',55                        help='comma separated list of dictionary files')56    parser.add_option('--tagset-file',57                        dest='tagsetFile',58                        metavar='FILE',59                        help='path to the file with tagset')60    parser.add_option('--segments-file',61                        dest='segmentsFile',62                        metavar='FILE',63                        help='path to the file with segment rules')64    #~ parser.add_option('--trim-supneg',65                        #~ dest='trimSupneg',66                        #~ default=False,67                        #~ action='store_true',68                        #~ help='this option is ignored and exists only for backwards compatibility')69    parser.add_option('--dict',70                        dest='dictName',71                        help='the name of result dictionary')72    parser.add_option('--dict-dir',73                        dest='dictDir',74                        metavar='FILE',75                        default=os.getcwd(),76                        help='path to output directory (the default is current dir)')77    parser.add_option('--only-analyzer',78                        dest='onlyAnalyzer',79                        action='store_true',80                        default=False,81                        help='Generate dictionary for morphological analysis only (default is both analysis and synthesis)')82    parser.add_option('--only-generator',83                        dest='onlyGenerator',84                        action='store_true',85                        default=False,86                        help='Generate dictionary for morphological synthesis only (default is both analysis and synthesis)')87    parser.add_option('--analyzer-cpp',88                        dest='analyzerCpp',89                        metavar='FILE',90                        help='Encode analyzer dictionary data in given c++ file')91    parser.add_option('--generator-cpp',92                        dest='generatorCpp',93                        metavar='FILE',94                        help='Encode generator dictionary data in given c++ file')95    #~ parser.add_option('--use-arrays',96                        #~ dest='useArrays',97                        #~ action='store_true',98                        #~ default=False,99                        #~ help='store states reachable by 2 transitions in arrays (should speed up recognition, available only when --serialization-method=V1)')100    parser.add_option('--serialization-method',101                        dest='serializationMethod',102                        default='V1',103                        help="FSA serialization method: \104                        SIMPLE - fixed-length transitions, fastest and weakest compression \105                        V1 - variable-length transitions, compressed labels - strongest compression \106                        V2 - format similar to the default in Jan Daciuk's fsa package - variable-length transitions, non-compressed labels - good compression, good speed")107    #~ parser.add_option('--visualize',108                        #~ dest='visualize',109                        #~ action='store_true',110                        #~ default=False,111                        #~ help='visualize result')112    parser.add_option('--analyzer-train-file',113                        dest='analyzerTrainFile',114                        help='A text file used for analyzer training. Should contain words from some large corpus - one word in each line. Resulting analysis automaton should be faster with proper train file.')115    parser.add_option('--generator-train-file',116                        dest='generatorTrainFile',117                        help='A text file used for generator training. Should contain words from some large corpus - one word in each line. Resulting synthesis automaton should be faster with proper train file.')118    parser.add_option('--debug',119                        dest='debug',120                        action='store_true',121                        default=False,122                        help='output some debugging info')123    #~ parser.add_option('--profile',124                        #~ dest='profile',125                        #~ action='store_true',126                        #~ default=False,127                        #~ help='show profiling graph (required pycallgraph and graphviz')128    opts, args = parser.parse_args()129    _checkOption(opts.inputFiles, parser, "Input file is missing")130    _checkOption(opts.dictDir, parser, "Output dictionary dir is missing")131    _checkCondition((opts.onlyAnalyzer, opts.onlyGenerator) != (True, True),132                              parser, 'Cannot set both --only-analyzer and --only-generator')133    writeCpp = {opts.analyzerCpp, opts.generatorCpp} != {None}134    _checkCondition(opts.dictName or writeCpp, parser, "Dictionary name is missing")135    _checkCondition(opts.onlyGenerator or opts.analyzerCpp or not writeCpp, parser, "Analyzer .cpp output file path is missing")136    _checkCondition(opts.onlyAnalyzer or opts.generatorCpp or not writeCpp, parser, "Generator .cpp output file path is missing")137    #~ _checkCondition((opts.dictName, opts.outputCpp) != (None, None),138                              #~ parser, 'Must set at least one of: --dict-name, --output-cpp')139    #~ _checkOption(opts.outputFile, parser, "Output file is missing")140    _checkOption(opts.tagsetFile, parser, "Tagset file is missing")141    _checkOption(opts.segmentsFile, parser, "Segmentation file is missing")142    #~ _checkOption(opts.serializationMethod, parser, "Serialization method file is missing")143    #~ _checkExactlyOneOptionSet([opts.analyzer, opts.generator],144                              #~ parser, 'Must set exactly one FSA type: --analyzer or --generator')145    _checkOpen(opts.tagsetFile, 'r')146    _checkOpen(opts.segmentsFile, 'r')147    for filename in opts.inputFiles:148        _checkOpen(filename, 'r')149    if not opts.onlyGenerator:150        _checkOpen(_getDictFilename(opts, isGenerator=False), 'w')151    if not opts.onlyAnalyzer:152        _checkOpen(_getDictFilename(opts, isGenerator=True), 'w')153    if not opts.serializationMethod.upper() in [SerializationMethod.SIMPLE, SerializationMethod.V1]:154        print >> sys.stderr, '--serialization-method must be one of ('+str([SerializationMethod.SIMPLE, SerializationMethod.V1])+')'155        parser.print_help()156        exit(1)157    return opts158def _concatFiles(inputFiles):159    for inputFile in inputFiles:160        if inputFile:161            with open(inputFile, 'r') as f:162                for line in f:163                    yield line164def _readDictIdAndCopyright(inputFiles):165    dictId = None166    copyright = None167    for inputFile in inputFiles:168        if inputFile:169            with codecs.open(inputFile, 'r', 'utf8') as f:170                inCopyright = False171                for linenum, line in enumerate(f, start=1):172                    if dictId is None and line.startswith(u'#!DICT-ID'):173                        dictIdTag, _, dictId = line.strip().partition(u' ')174                        exceptions.validate(175                            dictIdTag == u'#!DICT-ID',176                            u'Dictionary ID tag must be followed by a space character and dictionary ID string')177                        exceptions.validate(178                            len(line.split(u' ')) > 1,179                            u'%s:%d: Must provide DICT-ID' % (inputFile, linenum))180                        exceptions.validate(181                            len(line.split(u' ')) == 2,182                            u'%s:%d: DICT-ID must not contain spaces' % (inputFile, linenum))183                    elif copyright is None and line.startswith(u'#<COPYRIGHT>'):184                        exceptions.validate(185                            line.strip() == u'#<COPYRIGHT>',186                            u'%s:%d: Copyright start tag must be the only one in the line' % (inputFile, linenum))187                        inCopyright = True188                        copyright = u''189                    elif line.startswith(u'#</COPYRIGHT>'):190                        exceptions.validate(191                            inCopyright,192                            u'%s:%d: Copyright end tag must be preceded by copyright start tag' % (inputFile, linenum))193                        exceptions.validate(194                            line.strip() == u'#</COPYRIGHT>',195                            u'%s:%d: Copyright end tag must be the only one in the line' % (inputFile, linenum))196                        inCopyright = False197                    elif inCopyright:198                        copyright += line199    if dictId is None:200        logging.warning(u'No dictionary ID tag found')201        dictId = u''202    if copyright is None:203        logging.warning(u'No copyright info found')204        copyright = u''205    return (dictId, copyright)206def _readNamesAndQualifiers(inputFiles):207    names = set([u''])208    qualifiers = set([frozenset()])209    lineParser = convertinput.LineParser()210    for line in _concatFiles(inputFiles):211        line = line.strip()212        if hasattr(line, 'decode'):213            # Py2.7214            line = line.decode('utf8')215        if not lineParser.ignoreLine(line):216            _, _, _, name, qualifier = lineParser.parseLine(line)217            names.add(name)218            qualifiers.add(convertinput.parseQualifiers(qualifier))219    namesMap = dict([(name, idx) for idx, name in enumerate(sorted(list(names)))])220    qualifiersMap = dict([(quals, idx) for idx, quals in enumerate(sorted(qualifiers, key=lambda q: tuple(sorted(q))))])221    exceptions.validate(222                    len(qualifiersMap) <= limits.MAX_QUALIFIERS_COMBINATIONS,223                    u'Too many qualifiers combinations. The limit is %d' % limits.MAX_QUALIFIERS_COMBINATIONS)224    return namesMap, qualifiersMap225def _readPolimorfInput4Analyzer(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager):226    logging.info('reading analyzer data from %s', str(inputFiles))227    for entry in convertinput.PolimorfConverter4Analyzer(tagset, namesMap, qualifiersMap, encoder, 'utf8', segmentRulesManager).convert(_concatFiles(inputFiles)):228        yield entry229def _readPolimorfInput4Generator(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager):230    logging.info('reading generator data from %s', str(inputFiles))231    for entry in convertinput.PolimorfConverter4Generator(tagset, namesMap, qualifiersMap, encoder, 'utf8', segmentRulesManager).convert(_concatFiles(inputFiles)):232        yield entry233def _readTrainData(trainFile):234    with codecs.open(trainFile, 'r', 'utf8') as f:235        for line in f:236            yield line.strip()237def _printStats(fsa):238    acceptingNum = 0239    sinkNum = 0240    arrayNum = 0241    for s in fsa.dfs():242        if s.isAccepting():243            acceptingNum += 1244        if s.transitionsNum == 0:245            sinkNum += 1246        if s.serializeAsArray:247            arrayNum += 1248    logging.info('states num: '+str(fsa.getStatesNum()))249    logging.info('transitions num: '+str(fsa.getTransitionsNum()))250    logging.info('accepting states num: '+str(acceptingNum))251    logging.info('sink states num: '+str(sinkNum))252    logging.info('array states num: '+str(arrayNum))253def buildAnalyzerFromPoliMorf(inputFiles, tagset, namesMap, qualifiersMap, segmentRulesManager):254    encoder = encode.MorphEncoder()255    fsa = FSA(encoder, tagset)256    for word, data in _readPolimorfInput4Analyzer(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager):257#         print word, data258        fsa.addEntry(word, data)259        del word260        del data261    fsa.close()262    logging.info('------')263    logging.info('Analyzer FSA stats:')264    logging.info('------')265    _printStats(fsa)266    return fsa267def buildGeneratorFromPoliMorf(inputFiles, tagset, namesMap, qualifiersMap, segmentRulesManager):268    encoder = encode.Encoder4Generator()269    fsa = FSA(encoder, tagset)270    inputData = _readPolimorfInput4Generator(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager)271    for word, data in inputData:272        fsa.addEntry(word, data)273    fsa.close()274    logging.info('------')275    logging.info('Generator FSA stats:')276    logging.info('------')277    _printStats(fsa)278    return fsa279def _doBuildDictionaryPart(opts, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, isGenerator):280    logging.info('reading segmentation rules')281    rulesParserVersion = rulesParser.RulesParser.PARSE4ANALYZER if not isGenerator else rulesParser.RulesParser.PARSE4GENERATOR282    segmentRulesManager = rulesParser.RulesParser(tagset, namesMap, qualifiersMap, rulesParserVersion).parse(opts.segmentsFile)283    segmentationRulesData = segmentRulesManager.serialize()284    logging.info('done reading segmentation rules')285    logging.info('building automaton')286    buildFunction = buildAnalyzerFromPoliMorf if not isGenerator else buildGeneratorFromPoliMorf287    fsa = buildFunction(opts.inputFiles, tagset, namesMap, qualifiersMap, segmentRulesManager)288    logging.info('done building automaton')289    if not isGenerator and opts.analyzerTrainFile:290        logging.info('training analyzer automaton with '+opts.analyzerTrainFile+' ...')291        fsa.train(_readTrainData(opts.analyzerTrainFile))292        logging.info('done training')293    if isGenerator and opts.generatorTrainFile:294        logging.info('training generator automaton with '+opts.generatorTrainFile+' ...')295        fsa.train(_readTrainData(opts.analyzerTrainFile))296        logging.info('done training')297    serializer = Serializer.getSerializer(opts.serializationMethod, fsa, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, segmentationRulesData)298    if opts.generatorCpp and isGenerator:299        serializer.serialize2CppFile(opts.generatorCpp, isGenerator=isGenerator)300    if opts.analyzerCpp and not isGenerator:301        serializer.serialize2CppFile(opts.analyzerCpp, isGenerator=isGenerator)302    if opts.dictDir:303        serializer.serialize2BinaryFile(_getDictFilename(opts, isGenerator=isGenerator), isGenerator=isGenerator)304    logging.info('total FSA size (in bytes): '+str(fsa.initialState.reverseOffset))305def main(opts):306    if opts.debug:307        logging.basicConfig(level=logging.DEBUG)308    else:309        logging.basicConfig(level=logging.INFO)310    logging.info('reading tagset')311    tagset = Tagset(opts.tagsetFile)312    logging.info('done reading tagset')313    logging.info('reading names and qualifiers')314    dictId, copyrightTxt = _readDictIdAndCopyright(opts.inputFiles)315    namesMap, qualifiersMap = _readNamesAndQualifiers(opts.inputFiles)316    logging.info('done reading names and qualifiers')317    if not opts.onlyGenerator:318        _doBuildDictionaryPart(opts, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, isGenerator=False)319    if not opts.onlyAnalyzer:320        _doBuildDictionaryPart(opts, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, isGenerator=True)321if __name__ == '__main__':322    import os323    opts = _parseOptions()324    #~ try:325    main(opts)326    #~ except Exception as ex:327        #~ print >> sys.stderr, u'Building dictionary file failed:', unicode(ex).encode('utf8'), 'type of error:', type(ex)328        #~ sys.exit(1)329    #~ finally:...morfeusz_builder.py
Source:morfeusz_builder.py  
1#!/usr/bin/python32# -*- coding:utf-8 -*-3'''4Created on 21 paź 20135@author: mlenart6'''7import os8import sys9import logging10import codecs11from morfeuszbuilder.fsa import encode12from morfeuszbuilder.fsa import convertinput13from morfeuszbuilder.fsa.fsa import FSA14from morfeuszbuilder.fsa.serializer import Serializer, SerializationMethod15from morfeuszbuilder.tagset.tagset import Tagset16from morfeuszbuilder.segrules import rulesParser17from morfeuszbuilder.utils import exceptions, limits18from optparse import OptionParser19def _checkOption(opt, parser, msg):20    if opt is None:21        print(msg, file=sys.stderr)22        parser.print_help()23        exit(1)24def _checkCondition(cond, parser, msg):25    if not cond:26        print(msg, file=sys.stderr)27        parser.print_help()28        exit(1)29def _parseListCallback(option, opt, value, parser):30    setattr(parser.values, option.dest, value.split(','))31def _checkOpen(filename, mode):32    try:33        with open(filename, mode) as _:34            pass35        if 'w' in mode:36            os.remove(filename)37    except IOError as ex:38        print >> sys.stderr, str(ex)39        exit(1)40def _getDictFilename(opts, isGenerator):41    typeCode = 's' if isGenerator else 'a'42    fname = '%s-%s.dict' % (opts.dictName, typeCode)43    return os.path.join(opts.dictDir, fname)44def _parseOptions():45    """46    Parses commandline args47    """48    parser = OptionParser()49    parser.add_option('--input-files',50                        type='string',51                        dest='inputFiles',52                        action='callback',53                        callback=_parseListCallback,54                        metavar='FILES',55                        help='comma separated list of dictionary files')56    parser.add_option('--tagset-file',57                        dest='tagsetFile',58                        metavar='FILE',59                        help='path to the file with tagset')60    parser.add_option('--segments-file',61                        dest='segmentsFile',62                        metavar='FILE',63                        help='path to the file with segment rules')64    #~ parser.add_option('--trim-supneg',65                        #~ dest='trimSupneg',66                        #~ default=False,67                        #~ action='store_true',68                        #~ help='this option is ignored and exists only for backwards compatibility')69    parser.add_option('--dict',70                        dest='dictName',71                        help='the name of result dictionary')72    parser.add_option('--dict-dir',73                        dest='dictDir',74                        metavar='FILE',75                        default=os.getcwd(),76                        help='path to output directory (the default is current dir)')77    parser.add_option('--only-analyzer',78                        dest='onlyAnalyzer',79                        action='store_true',80                        default=False,81                        help='Generate dictionary for morphological analysis only (default is both analysis and synthesis)')82    parser.add_option('--only-generator',83                        dest='onlyGenerator',84                        action='store_true',85                        default=False,86                        help='Generate dictionary for morphological synthesis only (default is both analysis and synthesis)')87    parser.add_option('--analyzer-cpp',88                        dest='analyzerCpp',89                        metavar='FILE',90                        help='Encode analyzer dictionary data in given c++ file')91    parser.add_option('--generator-cpp',92                        dest='generatorCpp',93                        metavar='FILE',94                        help='Encode generator dictionary data in given c++ file')95    #~ parser.add_option('--use-arrays',96                        #~ dest='useArrays',97                        #~ action='store_true',98                        #~ default=False,99                        #~ help='store states reachable by 2 transitions in arrays (should speed up recognition, available only when --serialization-method=V1)')100    parser.add_option('--serialization-method',101                        dest='serializationMethod',102                        default='V1',103                        help="FSA serialization method: \104                        SIMPLE - fixed-length transitions, fastest and weakest compression \105                        V1 - variable-length transitions, compressed labels - strongest compression \106                        V2 - format similar to the default in Jan Daciuk's fsa package - variable-length transitions, non-compressed labels - good compression, good speed")107    #~ parser.add_option('--visualize',108                        #~ dest='visualize',109                        #~ action='store_true', 110                        #~ default=False,111                        #~ help='visualize result')112    parser.add_option('--analyzer-train-file',113                        dest='analyzerTrainFile',114                        help='A text file used for analyzer training. Should contain words from some large corpus - one word in each line. Resulting analysis automaton should be faster with proper train file.')115    parser.add_option('--generator-train-file',116                        dest='generatorTrainFile',117                        help='A text file used for generator training. Should contain words from some large corpus - one word in each line. Resulting synthesis automaton should be faster with proper train file.')118    parser.add_option('--debug',119                        dest='debug',120                        action='store_true',121                        default=False,122                        help='output some debugging info')123    #~ parser.add_option('--profile',124                        #~ dest='profile',125                        #~ action='store_true',126                        #~ default=False,127                        #~ help='show profiling graph (required pycallgraph and graphviz')128    129    opts, args = parser.parse_args()130    131    _checkOption(opts.inputFiles, parser, "Input file is missing")132    _checkOption(opts.dictDir, parser, "Output dictionary dir is missing")133    _checkCondition((opts.onlyAnalyzer, opts.onlyGenerator) != (True, True), 134                              parser, 'Cannot set both --only-analyzer and --only-generator')135    writeCpp = {opts.analyzerCpp, opts.generatorCpp} != {None}136    _checkCondition(opts.dictName or writeCpp, parser, "Dictionary name is missing")137    _checkCondition(opts.onlyGenerator or opts.analyzerCpp or not writeCpp, parser, "Analyzer .cpp output file path is missing")138    _checkCondition(opts.onlyAnalyzer or opts.generatorCpp or not writeCpp, parser, "Generator .cpp output file path is missing")139    #~ _checkCondition((opts.dictName, opts.outputCpp) != (None, None), 140                              #~ parser, 'Must set at least one of: --dict-name, --output-cpp')141    #~ _checkOption(opts.outputFile, parser, "Output file is missing")142    _checkOption(opts.tagsetFile, parser, "Tagset file is missing")143    _checkOption(opts.segmentsFile, parser, "Segmentation file is missing")144    #~ _checkOption(opts.serializationMethod, parser, "Serialization method file is missing")145    #~ _checkExactlyOneOptionSet([opts.analyzer, opts.generator], 146                              #~ parser, 'Must set exactly one FSA type: --analyzer or --generator')147    148    _checkOpen(opts.tagsetFile, 'r')149    _checkOpen(opts.segmentsFile, 'r')150    for filename in opts.inputFiles:151        _checkOpen(filename, 'r')152    if not opts.onlyGenerator:153        _checkOpen(_getDictFilename(opts, isGenerator=False), 'w')154    if not opts.onlyAnalyzer:155        _checkOpen(_getDictFilename(opts, isGenerator=True), 'w')156    157    if not opts.serializationMethod.upper() in [SerializationMethod.SIMPLE, SerializationMethod.V1]:158        print('--serialization-method must be one of ('+str([SerializationMethod.SIMPLE, SerializationMethod.V1])+')', file=sys.stderr)159        parser.print_help()160        exit(1)161    162    return opts163def _concatFiles(inputFiles):164    for inputFile in inputFiles:165        if inputFile:166            with open(inputFile, 'r') as f:167                for line in f:168                    yield line169def _readDictIdAndCopyright(inputFiles):170    dictId = None171    copyright = None172    for inputFile in inputFiles:173        if inputFile:174            with codecs.open(inputFile, 'r', 'utf8') as f:175                inCopyright = False176                for linenum, line in enumerate(f, start=1):177                    if dictId is None and line.startswith(u'#!DICT-ID'):178                        dictIdTag, _, dictId = line.strip().partition(u' ')179                        exceptions.validate(180                            dictIdTag == u'#!DICT-ID',181                            'Dictionary ID tag must be followed by a space character and dictionary ID string')182                        exceptions.validate(183                            len(line.split(u' ')) > 1,184                            '%s:%d: Must provide DICT-ID' % (inputFile, linenum))185                        exceptions.validate(186                            len(line.split(u' ')) == 2,187                            '%s:%d: DICT-ID must not contain spaces' % (inputFile, linenum))188                    elif copyright is None and line.startswith(u'#<COPYRIGHT>'):189                        exceptions.validate(190                            line.strip() == u'#<COPYRIGHT>',191                            '%s:%d: Copyright start tag must be the only one in the line' % (inputFile, linenum))192                        inCopyright = True193                        copyright = ''194                    elif line.startswith('#</COPYRIGHT>'):195                        exceptions.validate(196                            inCopyright,197                            '%s:%d: Copyright end tag must be preceded by copyright start tag' % (inputFile, linenum))198                        exceptions.validate(199                            line.strip() == u'#</COPYRIGHT>',200                            '%s:%d: Copyright end tag must be the only one in the line' % (inputFile, linenum))201                        inCopyright = False202                    elif inCopyright:203                        copyright += line204    if dictId is None:205        logging.warn('No dictionary ID tag found')206        dictId = ''207    if copyright is None:208        logging.warn('No copyright info found')209        copyright = ''210    return (dictId, copyright)211def _readNamesAndQualifiers(inputFiles):212    names = set([''])213    qualifiers = set([frozenset()])214    lineParser = convertinput.LineParser()215    for line in _concatFiles(inputFiles):216        line = line.strip()217        if not lineParser.ignoreLine(line):218            _, _, _, name, qualifier = lineParser.parseLine(line)219            names.add(name)220            qualifiers.add(convertinput.parseQualifiers(qualifier))221    namesMap = dict([(name, idx) for idx, name in enumerate(sorted(list(names)))])222    qualifiersMap = dict([(quals, idx) for idx, quals in enumerate(sorted(qualifiers, key=lambda q: tuple(sorted(q))))])223    exceptions.validate(224                    len(qualifiersMap) <= limits.MAX_QUALIFIERS_COMBINATIONS, 225                    'Too many qualifiers combinations. The limit is %d' % limits.MAX_QUALIFIERS_COMBINATIONS)226    227    return namesMap, qualifiersMap228def _readPolimorfInput4Analyzer(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager):229    logging.info('reading analyzer data from %s', str(inputFiles))230    for entry in convertinput.PolimorfConverter4Analyzer(tagset, namesMap, qualifiersMap, encoder, 'utf8', segmentRulesManager).convert(_concatFiles(inputFiles)):231        yield entry232def _readPolimorfInput4Generator(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager):233    logging.info('reading generator data from %s', str(inputFiles))234    for entry in convertinput.PolimorfConverter4Generator(tagset, namesMap, qualifiersMap, encoder, 'utf8', segmentRulesManager).convert(_concatFiles(inputFiles)):235        yield entry236def _readTrainData(trainFile):237    with codecs.open(trainFile, 'r', 'utf8') as f:238        for line in f:239            yield line.strip()240def _printStats(fsa):241    acceptingNum = 0242    sinkNum = 0243    arrayNum = 0244    for s in fsa.dfs():245        if s.isAccepting():246            acceptingNum += 1247        if s.transitionsNum == 0:248            sinkNum += 1249        if s.serializeAsArray:250            arrayNum += 1251    logging.info('states num: '+str(fsa.getStatesNum()))252    logging.info('transitions num: '+str(fsa.getTransitionsNum()))253    logging.info('accepting states num: '+str(acceptingNum))254    logging.info('sink states num: '+str(sinkNum))255    logging.info('array states num: '+str(arrayNum))256def buildAnalyzerFromPoliMorf(inputFiles, tagset, namesMap, qualifiersMap, segmentRulesManager):257    encoder = encode.MorphEncoder()258    fsa = FSA(encoder, tagset)259    for word, data in _readPolimorfInput4Analyzer(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager):260#         print word, data261        fsa.addEntry(word, data)262        del word263        del data264    fsa.close()265    logging.info('------')266    logging.info('Analyzer FSA stats:')267    logging.info('------')268    _printStats(fsa)269    return fsa270def buildGeneratorFromPoliMorf(inputFiles, tagset, namesMap, qualifiersMap, segmentRulesManager):271    encoder = encode.Encoder4Generator()272    fsa = FSA(encoder, tagset)273    inputData = _readPolimorfInput4Generator(inputFiles, tagset, namesMap, qualifiersMap, encoder, segmentRulesManager)274    for word, data in inputData:275        fsa.addEntry(word, data)276    fsa.close()277    logging.info('------')278    logging.info('Generator FSA stats:')279    logging.info('------')280    _printStats(fsa)281    return fsa282def _doBuildDictionaryPart(opts, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, isGenerator):283    284    logging.info('reading segmentation rules')285    rulesParserVersion = rulesParser.RulesParser.PARSE4ANALYZER if not isGenerator else rulesParser.RulesParser.PARSE4GENERATOR286    segmentRulesManager = rulesParser.RulesParser(tagset, namesMap, qualifiersMap, rulesParserVersion).parse(opts.segmentsFile)287    segmentationRulesData = segmentRulesManager.serialize()288    logging.info('done reading segmentation rules')289    290    logging.info('building automaton')291    buildFunction = buildAnalyzerFromPoliMorf if not isGenerator else buildGeneratorFromPoliMorf292    fsa = buildFunction(opts.inputFiles, tagset, namesMap, qualifiersMap, segmentRulesManager)293    logging.info('done building automaton')294    295    if not isGenerator and opts.analyzerTrainFile:296        logging.info('training analyzer automaton with '+opts.analyzerTrainFile+' ...')297        fsa.train(_readTrainData(opts.analyzerTrainFile))298        logging.info('done training')299    300    if isGenerator and opts.generatorTrainFile:301        logging.info('training generator automaton with '+opts.generatorTrainFile+' ...')302        fsa.train(_readTrainData(opts.analyzerTrainFile))303        logging.info('done training')304    305    serializer = Serializer.getSerializer(opts.serializationMethod, fsa, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, segmentationRulesData)306    if opts.generatorCpp and isGenerator:307        serializer.serialize2CppFile(opts.generatorCpp, isGenerator=isGenerator)308    if opts.analyzerCpp and not isGenerator:309        serializer.serialize2CppFile(opts.analyzerCpp, isGenerator=isGenerator)310    311    if opts.dictDir:312        serializer.serialize2BinaryFile(_getDictFilename(opts, isGenerator=isGenerator), isGenerator=isGenerator)313    314    logging.info('total FSA size (in bytes): '+str(fsa.initialState.reverseOffset))315def main(opts):316    if opts.debug:317        logging.basicConfig(level=logging.DEBUG)318    else:319        logging.basicConfig(level=logging.INFO)320    321    logging.info('reading tagset')322    tagset = Tagset(opts.tagsetFile)323    logging.info('done reading tagset')324    325    logging.info('reading names and qualifiers')326    dictId, copyrightTxt = _readDictIdAndCopyright(opts.inputFiles)327    namesMap, qualifiersMap = _readNamesAndQualifiers(opts.inputFiles)328    logging.info('done reading names and qualifiers')329    330    if not opts.onlyGenerator:331        _doBuildDictionaryPart(opts, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, isGenerator=False)332    if not opts.onlyAnalyzer:333        _doBuildDictionaryPart(opts, dictId, copyrightTxt, tagset, namesMap, qualifiersMap, isGenerator=True)334if __name__ == '__main__':335    import os336    opts = _parseOptions()...test_account.py
Source:test_account.py  
...44    _ = logged_in_account.username45    assert logged_in_account.n_requests == n + 146def test_iter_lists(once_logged_in_account):47    lists = once_logged_in_account.iter_lists()48    assert inspect.isgenerator(lists)49    assert all(isinstance(x, TMDbList) for x in lists)50def test_iter_favorite_movies(once_logged_in_account):51    favorite_movies = once_logged_in_account.iter_favorite_movies()52    assert inspect.isgenerator(favorite_movies)53    assert all(isinstance(x, Movie) for x in favorite_movies)54def test_iter_favorite_shows(once_logged_in_account):55    favorite_shows = once_logged_in_account.iter_favorite_shows()56    assert inspect.isgenerator(favorite_shows)57    assert all(isinstance(x, Show) for x in favorite_shows)58def test_iter_rated_movies(once_logged_in_account):59    rated_movies = once_logged_in_account.iter_rated_movies()60    assert inspect.isgenerator(rated_movies)61    assert all(isinstance(x, Movie) for x in rated_movies)62def test_iter_rated_shows(once_logged_in_account):63    rated_shows = once_logged_in_account.iter_rated_shows()64    assert inspect.isgenerator(rated_shows)65    assert all(isinstance(x, Show) for x in rated_shows)66def test_iter_rated_episodes(once_logged_in_account):67    rated_episodes = once_logged_in_account.iter_rated_episodes()68    assert inspect.isgenerator(rated_episodes)69    assert all(isinstance(x, Episode) for x in rated_episodes)70def test_iter_movie_watchlist(once_logged_in_account):71    movie_watchlist = once_logged_in_account.iter_movie_watchlist()72    assert inspect.isgenerator(movie_watchlist)73    assert all(isinstance(x, Movie) for x in movie_watchlist)74def test_iter_show_watchlist(once_logged_in_account):75    show_watchlist = once_logged_in_account.iter_show_watchlist()76    assert inspect.isgenerator(show_watchlist)77    assert all(isinstance(x, Movie) for x in show_watchlist)78def test_mark_as_favorite(once_logged_in_account):79    once_logged_in_account.remove_from_favorites(Movie(18148))80    r = once_logged_in_account.mark_as_favorite(Movie(18148))81    assert r["status_code"] == 182def test_remove_from_favorite(once_logged_in_account):83    once_logged_in_account.mark_as_favorite(Movie(18148))84    r = once_logged_in_account.remove_from_favorites(Movie(18148))85    assert r["status_code"] == 1386def test_add_to_watchlist(once_logged_in_account):87    once_logged_in_account.remove_from_watchlist(Movie(18148))88    r = once_logged_in_account.add_to_watchlist(Movie(18148))89    assert r["status_code"] == 190def test_remove_from_watchlist(once_logged_in_account):...v1.py
Source:v1.py  
1import json2from pathlib import Path3from typing import Dict, List, Optional4from cmake_file_api.kinds.common import CMakeSourceBuildPaths, VersionMajorMinor5from cmake_file_api.kinds.kind import ObjectKind6class CMakeFilesInput(object):7    __slots__ = ("path", "isGenerator", "isExternal", "isCMake")8    def __init__(self, path: Path, isGenerator: Optional[bool], isExternal: Optional[bool], isCMake: Optional[bool]):9        self.path = path10        self.isGenerator = isGenerator11        self.isExternal = isExternal12        self.isCMake = isCMake13    @classmethod14    def from_dict(cls, dikt: Dict) -> "CMakeFileInput":15        path = Path(dikt["path"])16        isGenerator = dikt.get("isGenerator")17        isExternal = dikt.get("isExternal")18        isCMake = dikt.get("isExternal")19        return cls(path, isGenerator, isExternal, isCMake)20    def __repr__(self) -> str:21        return "{}(path='{}', generator={}, external={}, cmake={})".format(22            type(self).__name__,23            self.path,24            self.isGenerator,25            self.isExternal,26            self.isCMake,27        )28class CMakeFilesV1(object):29    KIND = ObjectKind.CMAKEFILES30    __slots__ = ("version", "paths", "inputs")31    def __init__(self, version: VersionMajorMinor, paths: CMakeSourceBuildPaths, inputs: List[CMakeFilesInput]):32        self.version = version33        self.paths = paths34        self.inputs = inputs35    @classmethod36    def from_dict(cls, dikt: Dict, reply_path: Path) -> "CmakeFilesV2":37        version = VersionMajorMinor.from_dict(dikt["version"])38        paths = CMakeSourceBuildPaths.from_dict(dikt["paths"])39        inputs = list(CMakeFilesInput.from_dict(cmi) for cmi in dikt["inputs"])40        return cls(version, paths, inputs)41    @classmethod42    def from_path(cls, path: Path, reply_path: Path) -> "CmakeFilesV2":43        dikt = json.load(path.open())44        return cls.from_dict(dikt, reply_path)45    def __repr__(self) -> str:46        return "{}(version={}, paths={}, inputs={})".format(47            type(self).__name__,48            repr(self.version),49            self.paths,50            repr(self.inputs),...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
