Best Python code snippet using autotest_python
datastore_stats_generator.py
Source:datastore_stats_generator.py  
1#!/usr/bin/env python2#3# Copyright 2007 Google Inc.4#5# Licensed under the Apache License, Version 2.0 (the "License");6# you may not use this file except in compliance with the License.7# You may obtain a copy of the License at8#9#     http://www.apache.org/licenses/LICENSE-2.010#11# Unless required by applicable law or agreed to in writing, software12# distributed under the License is distributed on an "AS IS" BASIS,13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14# See the License for the specific language governing permissions and15# limitations under the License.16#17"""Generate Datastore Stats over Dev mode appserver's datastore."""18import datetime19import logging20from google.appengine.api import datastore21from google.appengine.api import datastore_admin22from google.appengine.api import datastore_types23from google.appengine.api import users24from google.appengine.ext.db import stats25DELETE_BATCH_SIZE = 10026_GLOBAL_KEY = (stats.GlobalStat, 'total_entity_usage', '')27_PROPERTY_TYPE_TO_DSS_NAME = {28    unicode: ('String', 'STRING'),29    bool: ('Boolean', 'BOOLEAN'),30    long: ('Integer', 'INT64'),31    type(None): ('NULL', 'NULL'),32    float: ('Float', 'DOUBLE'),33    datastore_types.Key: ('Key', 'REFERENCE'),34    datastore_types.Blob: ('Blob', 'STRING'),35    datastore_types.EmbeddedEntity: ('EmbeddedEntity', 'STRING'),36    datastore_types.ByteString: ('ShortBlob', 'STRING'),37    datastore_types.Text: ('Text', 'STRING'),38    users.User: ('User', 'USER'),39    datastore_types.Category: ('Category', 'STRING'),40    datastore_types.Link: ('Link', 'STRING'),41    datastore_types.Email: ('Email', 'STRING'),42    datetime.datetime: ('Date/Time', 'INT64'),43    datastore_types.GeoPt: ('GeoPt', 'POINT'),44    datastore_types.IM: ('IM', 'STRING'),45    datastore_types.PhoneNumber: ('PhoneNumber', 'STRING'),46    datastore_types.PostalAddress: ('PostalAddress', 'STRING'),47    datastore_types.Rating: ('Rating', 'INT64'),48    datastore_types.BlobKey: ('BlobKey', 'STRING'),49    }50class DatastoreStatsProcessor(object):51  """Generates datastore stats for an app's an datastore entities."""52  def __init__(self, _app=None):53    self.app_id = datastore_types.ResolveAppId(_app)54    self.whole_app_stats = {}55    self.namespace_stats = {}56    self.found_non_empty_namespace = False57    self.old_stat_keys = []58    self.timestamp = datetime.datetime.utcnow()59  def __ScanAllNamespaces(self):60    """Scans all the namespaces and processes each namespace."""61    namespace_query = datastore.Query('__namespace__', _app=self.app_id)62    for namespace_entity in namespace_query.Run():63      name = namespace_entity.key().name()64      if name is None:65        name = ''66      self.__ProcessNamespace(name)67  def __ProcessNamespace(self, namespace):68    """Process all the entities in a given namespace."""69    all_query = datastore.Query(namespace=namespace, _app=self.app_id)70    for entity in all_query.Run():71      self.found_non_empty_namespace |= (namespace != '')72      proto = entity.ToPb()73      proto_size = len(proto.SerializeToString())74      if entity.key().kind() in stats._DATASTORE_STATS_CLASSES_BY_KIND:75        stat_kind = stats._DATASTORE_STATS_CLASSES_BY_KIND[entity.key().kind()]76        self.old_stat_keys.append(entity.key())77        self.__AggregateTotal(proto_size, entity.key(), proto, namespace,78                              stat_kind)79      else:80        self.__ProcessUserEntity(proto_size, entity.key(), proto, namespace)81  def __GetPropertyIndexStat(self, namespace, kind_name,82                             entity_key_size, prop):83    """Return the size and count of indexes for a property of an EntityProto."""84    property_index_size = (len(self.app_id) + len(kind_name) +85                           len(prop.value().SerializeToString()) +86                           len(namespace) + entity_key_size)87    return (property_index_size, 2)88  def __GetTypeIndexStat(self, namespace, kind_name, entity_key_size):89    """Return the size and count of indexes by type of an EntityProto."""90    type_index_size = (len(self.app_id) + len(kind_name) + entity_key_size91                       + len(namespace))92    return (type_index_size, 1)93  def __ProcessUserEntity(self, proto_size, key, proto, namespace):94    """Increment datastore stats for a non stats record."""95    self.__AggregateTotal(proto_size, key, proto, namespace, None)96    kind_name = key.kind()97    entity_key_size = (len(proto.key().app()) + len(namespace) +98                       len(proto.key().path().SerializeToString()) +99                       len(proto.entity_group().SerializeToString()))100    self.__AggregateCompositeIndices(proto, namespace, kind_name,101                                     entity_key_size)102    type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,103                                                                kind_name,104                                                                entity_key_size)105    property_index_count = 0106    property_index_size = 0107    for prop_list in (proto.property_list(), proto.raw_property_list()):108      for prop in prop_list:109        index_size, index_count = self.__GetPropertyIndexStat(namespace,110                                                              kind_name,111                                                              entity_key_size,112                                                              prop)113        property_index_size += index_size114        property_index_count += index_count115    builtin_index_size = type_index_size + property_index_size116    builtin_index_count = type_index_count + property_index_count117    self.__Increment(self.whole_app_stats, 1,118                     (stats.KindStat, kind_name, ''),119                     proto_size,120                     builtin_index_count=builtin_index_count,121                     builtin_index_size=builtin_index_size,122                     kind_name=kind_name)123    self.__Increment(self.namespace_stats, 1,124                     (stats.NamespaceKindStat, kind_name, namespace),125                     proto_size,126                     builtin_index_count=builtin_index_count,127                     builtin_index_size=builtin_index_size,128                     kind_name=kind_name)129    if key.parent() is None:130      whole_app_model = stats.KindRootEntityStat131      namespace_model = stats.NamespaceKindRootEntityStat132    else:133      whole_app_model = stats.KindNonRootEntityStat134      namespace_model = stats.NamespaceKindNonRootEntityStat135    self.__Increment(self.whole_app_stats, 1,136                     (whole_app_model, kind_name, ''),137                     proto_size,138                     kind_name=kind_name)139    self.__Increment(self.namespace_stats, 1,140                     (namespace_model, kind_name, namespace),141                     proto_size,142                     kind_name=kind_name)143    self.__ProcessProperties(144        kind_name,145        namespace,146        entity_key_size,147        (proto.property_list(), proto.raw_property_list()))148  def __ProcessProperties(self, kind_name, namespace, entity_key_size,149                          prop_lists):150    for prop_list in prop_lists:151      for prop in prop_list:152        try:153          value = datastore_types.FromPropertyPb(prop)154          self.__AggregateProperty(kind_name, namespace, entity_key_size,155                                   prop, value)156        except (AssertionError, AttributeError, TypeError, ValueError), e:157          logging.error('Cannot process property %r, exception %s' %158                        (prop, e))159  def __AggregateProperty(self, kind_name, namespace, entity_key_size,160                          prop, value):161    property_name = prop.name()162    property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][0]163    index_property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][1]164    size = len(prop.SerializeToString())165    index_size, index_count = self.__GetPropertyIndexStat(namespace, kind_name,166                                                          entity_key_size, prop)167    self.__Increment(self.whole_app_stats, 1,168                     (stats.PropertyTypeStat, property_type, ''),169                     size,170                     builtin_index_count=0,171                     builtin_index_size=0,172                     property_type=property_type)173    self.__Increment(self.whole_app_stats, 0,174                     (stats.PropertyTypeStat, index_property_type, ''),175                     0,176                     builtin_index_count=index_count,177                     builtin_index_size=index_size,178                     property_type=index_property_type)179    self.__Increment(self.namespace_stats, 1,180                     (stats.NamespacePropertyTypeStat,181                      property_type, namespace),182                     size,183                     builtin_index_count=0,184                     builtin_index_size=0,185                     property_type=property_type)186    self.__Increment(self.namespace_stats, 0,187                     (stats.NamespacePropertyTypeStat,188                      index_property_type, namespace),189                     0,190                     builtin_index_count=index_count,191                     builtin_index_size=index_size,192                     property_type=index_property_type)193    self.__Increment(self.whole_app_stats, 1,194                     (stats.KindPropertyTypeStat,195                      property_type + '_' + kind_name, ''),196                     size,197                     builtin_index_count=0,198                     builtin_index_size=0,199                     property_type=property_type, kind_name=kind_name)200    self.__Increment(self.whole_app_stats, 0,201                     (stats.KindPropertyTypeStat,202                      index_property_type + '_' + kind_name, ''),203                     0,204                     builtin_index_count=index_count,205                     builtin_index_size=index_size,206                     property_type=index_property_type, kind_name=kind_name)207    self.__Increment(self.namespace_stats, 1,208                     (stats.NamespaceKindPropertyTypeStat,209                      property_type + '_' + kind_name, namespace),210                     size,211                     builtin_index_count=0,212                     builtin_index_size=0,213                     property_type=property_type, kind_name=kind_name)214    self.__Increment(self.namespace_stats, 0,215                     (stats.NamespaceKindPropertyTypeStat,216                      index_property_type + '_' + kind_name, namespace),217                     0,218                     builtin_index_count=index_count,219                     builtin_index_size=index_size,220                     property_type=index_property_type, kind_name=kind_name)221    self.__Increment(self.whole_app_stats, 1,222                     (stats.KindPropertyNameStat,223                      property_name + '_' + kind_name, ''),224                     size,225                     builtin_index_count=index_count,226                     builtin_index_size=index_size,227                     property_name=property_name, kind_name=kind_name)228    self.__Increment(self.namespace_stats, 1,229                     (stats.NamespaceKindPropertyNameStat,230                      property_name + '_' + kind_name, namespace),231                     size,232                     builtin_index_count=index_count,233                     builtin_index_size=index_size,234                     property_name=property_name, kind_name=kind_name)235    self.__Increment(self.whole_app_stats, 1,236                     (stats.KindPropertyNamePropertyTypeStat,237                      property_type + '_' + property_name + '_' + kind_name,238                      ''), size,239                     builtin_index_count=0,240                     builtin_index_size=0,241                     property_type=property_type,242                     property_name=property_name, kind_name=kind_name)243    self.__Increment(self.whole_app_stats, 0,244                     (stats.KindPropertyNamePropertyTypeStat,245                      index_property_type + '_' + property_name + '_' +246                      kind_name,247                      ''), 0,248                     builtin_index_count=index_count,249                     builtin_index_size=index_size,250                     property_type=index_property_type,251                     property_name=property_name, kind_name=kind_name)252    self.__Increment(self.namespace_stats, 1,253                     (stats.NamespaceKindPropertyNamePropertyTypeStat,254                      property_type + '_' + property_name + '_' + kind_name,255                      namespace),256                     size,257                     builtin_index_count=0,258                     builtin_index_size=0,259                     property_type=property_type,260                     property_name=property_name, kind_name=kind_name)261    self.__Increment(self.namespace_stats, 0,262                     (stats.NamespaceKindPropertyNamePropertyTypeStat,263                      index_property_type + '_' + property_name + '_' +264                      kind_name,265                      namespace),266                     0,267                     builtin_index_count=index_count,268                     builtin_index_size=index_size,269                     property_type=index_property_type,270                     property_name=property_name, kind_name=kind_name)271  def __GetCompositeIndexStat(self, definition, proto, namespace, kind_name,272                              entity_key_size):273    """Get statistics of composite index for a index definition of an entity."""274    property_list = proto.property_list()275    property_count = []276    property_size = []277    index_count = 1278    for indexed_prop in definition.property_list():279      name = indexed_prop.name()280      count = 0281      prop_size = 0282      for prop in property_list:283        if prop.name() == name:284          count += 1285          prop_size += len(prop.SerializeToString())286      property_count.append(count)287      property_size.append(prop_size)288      index_count *= count289    if index_count == 0:290      return (0, 0)291    index_only_size = 0292    for i in range(len(property_size)):293      index_only_size += property_size[i] * (index_count / property_count[i])294    index_size = (index_count * (entity_key_size + len(kind_name) +295                                 len(self.app_id) + len(namespace)) +296                  index_only_size * 2)297    return (index_size, index_count)298  def __AggregateCompositeIndices(self, proto, namespace, kind_name,299                                  entity_key_size):300    """Aggregate statistics of composite indexes for an entity."""301    composite_indices = datastore_admin.GetIndices(self.app_id)302    for index in composite_indices:303      definition = index.definition()304      if kind_name != definition.entity_type():305        continue306      index_size, index_count = self.__GetCompositeIndexStat(definition, proto,307                                                             namespace,308                                                             kind_name,309                                                             entity_key_size)310      if index_count == 0:311        continue312      name_id = namespace313      if not name_id:314        name_id = 1315      self.__Increment(self.whole_app_stats, 0, _GLOBAL_KEY, 0,316                       composite_index_count=index_count,317                       composite_index_size=index_size)318      self.__Increment(self.whole_app_stats, 0,319                       (stats.NamespaceStat, name_id, ''), 0,320                       composite_index_count=index_count,321                       composite_index_size=index_size,322                       subject_namespace=namespace)323      self.__Increment(self.namespace_stats, 0,324                       (stats.NamespaceGlobalStat, 'total_entity_usage',325                        namespace), 0,326                       composite_index_count=index_count,327                       composite_index_size=index_size)328      self.__Increment(self.whole_app_stats, 0,329                       (stats.KindStat, kind_name, ''), 0,330                       composite_index_count=index_count,331                       composite_index_size=index_size,332                       kind_name=kind_name)333      self.__Increment(self.namespace_stats, 0,334                       (stats.NamespaceKindStat, kind_name, namespace), 0,335                       composite_index_count=index_count,336                       composite_index_size=index_size,337                       kind_name=kind_name)338      index_id = index.id()339      self.__Increment(self.whole_app_stats, index_count,340                       (stats.KindCompositeIndexStat,341                        kind_name + '_%s' % index_id, ''), index_size,342                       kind_name=kind_name, index_id=index_id)343      self.__Increment(self.namespace_stats, index_count,344                       (stats.NamespaceKindCompositeIndexStat,345                        kind_name + '_%s' % index_id, namespace), index_size,346                       kind_name=kind_name, index_id=index_id)347  def __AggregateTotal(self, size, key, proto, namespace, stat_kind):348    """Aggregate total datastore stats."""349    kind_name = key.kind()350    entity_key_size = (len(proto.key().app()) +351                       len(proto.key().path().SerializeToString()) +352                       len(proto.entity_group().SerializeToString()))353    type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,354                                                                kind_name,355                                                                entity_key_size)356    property_index_count = 0357    property_index_size = 0358    for prop_list in (proto.property_list(), proto.raw_property_list()):359      for prop in prop_list:360        index_size, index_count = self.__GetPropertyIndexStat(namespace,361                                                              kind_name,362                                                              entity_key_size,363                                                              prop)364        property_index_size += index_size365        property_index_count += index_count366    builtin_index_size = type_index_size + property_index_size367    builtin_index_count = type_index_count + property_index_count368    if stat_kind == stats.GlobalStat:369      count = 0370    else:371      count = 1372    self.__Increment(self.whole_app_stats, count, _GLOBAL_KEY, size,373                     builtin_index_count=builtin_index_count,374                     builtin_index_size=builtin_index_size)375    name_id = namespace376    if not name_id:377      name_id = 1378    if (stat_kind == stats.NamespaceStat) and (namespace == ''):379      count = 0380    self.__Increment(self.whole_app_stats, count,381                     (stats.NamespaceStat, name_id, ''),382                     size,383                     builtin_index_count=builtin_index_count,384                     builtin_index_size=builtin_index_size,385                     subject_namespace=namespace)386    if stat_kind == stats.NamespaceGlobalStat:387      count = 0388    self.__Increment(389        self.namespace_stats, count,390        (stats.NamespaceGlobalStat, 'total_entity_usage', namespace), size,391        builtin_index_count=builtin_index_count,392        builtin_index_size=builtin_index_size)393  def __Increment(self, stats_dict, count, stat_key, size,394                  builtin_index_count=0, builtin_index_size=0,395                  composite_index_count=0, composite_index_size=0, **kwds):396    """Increment stats for a particular kind.397    Args:398        stats_dict: The dictionary where the entities are held.399          The entities are keyed by stat_key. e.g. The400          __Stat_Total__ entity will be found in stats_dict[_GLOBAL_KEY].401        count: The amount to increment the datastore stat by.402        stat_key: A tuple of (db.Model of the stat, key value, namespace).403        size: The "bytes" to increment the size by.404        builtin_index_count: The bytes of builtin index to add in to a stat.405        builtin_index_size: The count of builtin index to add in to a stat.406        composite_index_count: The bytes of composite index to add in to a stat.407        composite_index_size: The count of composite index to add in to a stat.408        kwds: Name value pairs that are set on the created entities.409    """410    if stat_key not in stats_dict:411      stat_model = stat_key[0](412          key=datastore_types.Key.from_path(stat_key[0].STORED_KIND_NAME,413                                            stat_key[1],414                                            namespace=stat_key[2],415                                            _app=self.app_id),416          _app=self.app_id)417      stats_dict[stat_key] = stat_model418      for field, value in kwds.iteritems():419        setattr(stat_model, field, value)420      stat_model.count = count421      if size:422        stat_model.entity_bytes = size423      if builtin_index_size:424        stat_model.builtin_index_bytes = builtin_index_size425        stat_model.builtin_index_count = builtin_index_count426      if composite_index_size:427        stat_model.composite_index_bytes = composite_index_size428        stat_model.composite_index_count = composite_index_count429      stat_model.bytes = size + builtin_index_size + composite_index_size430      stat_model.timestamp = self.timestamp431    else:432      stat_model = stats_dict[stat_key]433      stat_model.count += count434      if size:435        stat_model.entity_bytes += size436      if builtin_index_size:437        stat_model.builtin_index_bytes += builtin_index_size438        stat_model.builtin_index_count += builtin_index_count439      if composite_index_size:440        stat_model.composite_index_bytes += composite_index_size441        stat_model.composite_index_count += composite_index_count442      stat_model.bytes += size + builtin_index_size + composite_index_size443  def __Finalize(self):444    """Finishes processing, deletes all old stats and writes new ones."""445    for i in range(0, len(self.old_stat_keys), DELETE_BATCH_SIZE):446      datastore.Delete(self.old_stat_keys[i:i+DELETE_BATCH_SIZE])447    self.written = 0448    for stat in self.whole_app_stats.itervalues():449      if stat.count or not (isinstance(stat, stats.GlobalStat) or450                            isinstance(stat, stats.NamespaceStat)):451        stat.put()452        self.written += 1453    if self.found_non_empty_namespace:454      for stat in self.namespace_stats.itervalues():455        if stat.count or not isinstance(stat, stats.NamespaceGlobalStat):456          stat.put()457          self.written += 1458  def Run(self):459    """Scans the datastore, computes new stats and writes them."""460    self.__ScanAllNamespaces()461    self.__Finalize()462    return self463  def Report(self):464    """Produce a small report about the result."""465    stat = self.whole_app_stats.get(_GLOBAL_KEY, None)466    entity_size = 0467    entity_count = 0468    builtin_index_size = 0469    builtin_index_count = 0470    composite_index_size = 0471    composite_index_count = 0472    if stat:473      entity_size = stat.entity_bytes474      entity_count = stat.count475      builtin_index_size = stat.builtin_index_bytes476      builtin_index_count = stat.builtin_index_count477      composite_index_size = stat.composite_index_bytes478      composite_index_count = stat.composite_index_count479      if not entity_count:480        entity_count = 1481    return ('Scanned %d entities of total %d bytes, %d index entries of total '482            '%d bytes and %d composite index entries of total %d bytes. '483            'Inserted %d new records.'484            % (entity_count, entity_size, builtin_index_count,485               builtin_index_size, composite_index_count, composite_index_size,...test_database.py
Source:test_database.py  
...10# the test cases for database11class database_test_cases(unittest.TestCase):12    def setUp(self):13        self.db = Database()14        self.sql_count_article = 'SELECT count(article_id) FROM HooliASE.articles'15        self.sql_count_question = 'SELECT count(question_id) FROM HooliASE.questions'16        self.sql_count_history = 'SELECT count(history_id) FROM HooliASE.history'17        self.sql_count_feedback = 'SELECT count(id) FROM HooliASE.answer_feedback'18        self.sql_delete_article = 'DELETE from HooliASE.articles WHERE article_id=%s'19        self.sql_delete_question = 'DELETE from HooliASE.questions WHERE question_id=%s'20        self.sql_delete_history = 'DELETE from HooliASE.history WHERE history_id=%s'21        self.sql_delete_feedback = 'DELETE from HooliASE.answer_feedback WHERE id=%s'22    # add_article(title, content)23    def test_add_article(self):24        # count the original row number25        self.db.mycursor.execute(self.sql_count_article)26        ori_row = count_rows(self.db.mycursor)27        # add a new test record into articles table28        article_id = self.db.add_article('ASE', 'We all love ASE')29        # assert the success of insertion30        self.db.mycursor.execute(self.sql_count_article)31        new_row = count_rows(self.db.mycursor)...ets_cleaning_nltk.py
Source:ets_cleaning_nltk.py  
...84	# Counting85	ZHO_length.append(words)86	ZHO_num_sentences.append(num_sentence)87	ZHO_one_count.append(ones)88	ZHO_the_count.append(determiners.count('the'))89	ZHO_aan_count.append(determiners.count('a') + determiners.count('an'))90	ZHO_this_count.append(determiners.count('this'))91	ZHO_that_count.append(determiners.count('that'))92	ZHO_these_count.append(determiners.count('these'))93	ZHO_those_count.append(determiners.count('those'))94	determiners = [x for x in determiners if x not in det_in_question]95	ZHO_other_det_count.append(len(determiners))96	ZHO_noun_count.append(len(nouns))97	ZHO_verb_count.append(len(verbs))98	ZHO_adj_count.append(len(adjs))99	ZHO_modal_count.append(len(modal))100	ZHO_PRP_count.append(prp)101mandarin_L1['essay_len'] = ZHO_length102mandarin_L1['num_sentence'] = ZHO_num_sentences103mandarin_L1['noun_count'] = ZHO_noun_count104mandarin_L1['verb_count'] = ZHO_verb_count105mandarin_L1['adj_count'] = ZHO_adj_count106mandarin_L1['modal_count'] = ZHO_modal_count107mandarin_L1['noun_per_sentence'] = mandarin_L1['noun_count'] / mandarin_L1['num_sentence']108mandarin_L1['verb_per_sentence'] = mandarin_L1['verb_count'] / mandarin_L1['num_sentence']109mandarin_L1['adj_per_sentence'] = mandarin_L1['adj_count'] / mandarin_L1['num_sentence']110mandarin_L1['modal_per_sentence'] = mandarin_L1['modal_count'] / mandarin_L1['num_sentence']111mandarin_L1['the_count'] = ZHO_the_count112mandarin_L1['aan_count'] = ZHO_aan_count113mandarin_L1['one_count'] = ZHO_one_count114mandarin_L1['this_count'] = ZHO_this_count115mandarin_L1['that_count'] = ZHO_that_count116mandarin_L1['these_count'] = ZHO_these_count117mandarin_L1['those_count'] = ZHO_those_count118mandarin_L1['other_det_count'] = ZHO_other_det_count119mandarin_L1['PRP_count'] = ZHO_PRP_count120mandarin_L1['the_freq'] = mandarin_L1['the_count'] / mandarin_L1['essay_len']121mandarin_L1['aan_freq'] = mandarin_L1['aan_count'] / mandarin_L1['essay_len']122mandarin_L1['one_freq'] = mandarin_L1['one_count'] / mandarin_L1['essay_len']123mandarin_L1['this_freq'] = mandarin_L1['this_count'] / mandarin_L1['essay_len']124mandarin_L1['that_freq'] = mandarin_L1['that_count'] / mandarin_L1['essay_len']125mandarin_L1['these_freq'] = mandarin_L1['these_count'] / mandarin_L1['essay_len']126mandarin_L1['those_freq'] = mandarin_L1['those_count'] / mandarin_L1['essay_len']127mandarin_L1['other_det_freq'] = mandarin_L1['other_det_count'] / mandarin_L1['essay_len']128mandarin_L1['PRP_freq'] = mandarin_L1['PRP_count'] / mandarin_L1['essay_len']129'''SPANISH L1'''130SPA_length = []131SPA_num_sentences = []132SPA_the_count = []133SPA_aan_count = []134SPA_one_count = []135SPA_this_count = []136SPA_that_count = []137SPA_these_count = []138SPA_those_count = []139SPA_other_det_count = []140SPA_noun_count = []141SPA_verb_count = []142SPA_modal_count = []143SPA_adj_count = []144SPA_PRP_count = []145for f in SPA_file:146	f_name = SPA_path + f147	temp = open(f_name, 'r')148	num_sentence = 0149	ones = 0150	determiners = []151	nouns = []152	verbs = []153	adjs = []154	modal = []155	words = 0 # number of words156	prp = 0157	for line in temp:158		arr = line.strip().split()159		if not len(arr) == 0:160			line_tokenized = nltk.word_tokenize(line.lower())161			tagged = [list(ele) for ele in nltk.pos_tag(line_tokenized)]162			tagged = [ele for ele in tagged if ele[0] not in puncs] # leave out punctuations163			words += len(tagged)164			one = np.array([x for x in tagged if x[0] == 'one'])165			dt = np.array([x for x in tagged if x[1] == 'DT'])166			n = np.array([x for x in tagged if x[1] in noun_tag])167			v = np.array([x for x in tagged if x[1] in verb_tag])168			a = np.array([x for x in tagged if x[1] in adj_tag])169			m = np.array([x for x in tagged if x[1] == 'MD'])170			ones += len(one)171			prp += find_PRP(tagged)172			if dt.shape[0] != 0:173				determiners += list(dt[:, 0])174			if n.shape[0] != 0:175				nouns += list(n[:, 0])176			if v.shape[0] != 0:177				verbs += list(v[:, 0])178			if a.shape[0] != 0:179				adjs += list(a[:, 0])180			if m.shape[0] != 0:181				modal += list(m[:, 0])182			num_sentence += 1183	temp.close()184	# Counting185	SPA_length.append(words)186	SPA_num_sentences.append(num_sentence)187	SPA_one_count.append(ones)188	SPA_the_count.append(determiners.count('the'))189	SPA_aan_count.append(determiners.count('a') + determiners.count('an'))190	SPA_this_count.append(determiners.count('this'))191	SPA_that_count.append(determiners.count('that'))192	SPA_these_count.append(determiners.count('these'))193	SPA_those_count.append(determiners.count('those'))194	determiners = [x for x in determiners if x not in det_in_question]195	SPA_other_det_count.append(len(determiners))196	SPA_noun_count.append(len(nouns))197	SPA_verb_count.append(len(verbs))198	SPA_adj_count.append(len(adjs))199	SPA_modal_count.append(len(modal))200	SPA_PRP_count.append(prp)201spanish_L1['essay_len'] = SPA_length202spanish_L1['num_sentence'] = SPA_num_sentences203spanish_L1['noun_count'] = SPA_noun_count204spanish_L1['verb_count'] = SPA_verb_count205spanish_L1['adj_count'] = SPA_adj_count206spanish_L1['modal_count'] = SPA_modal_count207spanish_L1['noun_per_sentence'] = spanish_L1['noun_count'] / spanish_L1['num_sentence']...analysis.py
Source:analysis.py  
...34        prov = (each.split())[0]35        data.append(prov)36    count={}37    for i in data:38        count[i]=data.count(i)39    # print(count)40    prov=list(count.keys())41    nums=list(count.values())42    return prov,nums43def prov_plt():44    prov, nums = provience()45    plt.figure(figsize=(8, 4))46    plt.xticks(rotation=0)47    plt.bar(prov, nums, color='g')48    plt.xlabel('ç份')49    plt.ylabel('æ°é')50    plt.title('ä¸åç份æ°éåå¸å¾')51    plt.legend()52    plt.show()53# è¯äºåæ ¹æ®è¯äºæ°æ®è¿è¡åæçæ±ç¶å¾54def cloud_plt():55    def cloud_data():56        title=data_analysis('title')57        titles=[]58        # 对æ¯ä¸ªæ é¢è¿è¡åè¯59        for each in title:60            title_cut=jieba.lcut(each)61            titles.append(title_cut)62        # åé¤ä¸éè¦çè¯è¯63        title_del=[]64        for line in titles:65            line_del=[]66            for word in line:67                if word not in ['2018','å¦å¦','â¤','ã','ã',' ','Chinism','å·¥ä½å®¤','å强']:68                    line_del.append(word)69            title_del.append(line_del)70        # print(title_del)71        # å
ç´ å»é,æ¯ä¸ªæ é¢ä¸ä¸å«éå¤å
ç´ 72        title_clean=[]73        for each in title_del:74            line_dist=[]75            for word in each:76                if word not in line_dist:77                    line_dist.append(word)78            title_clean.append(line_dist)79        # å°ææè¯è¯è½¬ä¸ºä¸ä¸ªlist80        allwords_dist=[]81        for line in title_clean:82            for word in line:83                allwords_dist.append(word)84        # æåè¡¨è½¬ä¸ºæ°æ®æ¡85        allwords_dist=pandas.DataFrame({'allwords':allwords_dist})86        # 对è¯è¯è¿è¡åç±»æ±æ»87        word_count=allwords_dist.allwords.value_counts().reset_index()88        # æ·»å åå89        word_count.columns=['word','count']90        # print(allwords_dist)91        return word_count,title_clean92    def cloud_data_count():93        # è·åååééæ°æ®94        sell_count = data_analysis('sell_count')95        word_count, title_clean = cloud_data()96        ws_count = []97        # ååä¸å
å«ç»è®¡çè¯æ¶ï¼å°å
¶ééå å
¥list98        for each in word_count.word:99            i = 0100            s_list = []101            for t in title_clean:102                if each in t:103                    s_list.append(int(sell_count[i]))104                    # print(s_list)105                i += 1106            # ç»è®¡ä¸ä¸ªå
³é®è¯æå
å«ååçééæ»æ°107            ws_count.append(sum(s_list))108        # æåè¡¨è½¬ä¸ºæ°æ®æ¡109        ws_count = pandas.DataFrame({'ws_count': ws_count})110        # æword_count, ws_countå并为ä¸ä¸ªè¡¨111        word_count = pandas.concat([word_count, ws_count], axis=1, ignore_index=True)112        word_count.columns = ['word', 'count', 'ws_count']113        # ååºæå114        word_count.sort_values('ws_count', inplace=True, ascending=True)115        # åæå¤§30è¡æ°æ®116        df_ws = word_count.tail(30)117        return df_ws118    # å¾äºé¨å119    word_count=cloud_data()[0]120    # 设置åä½ï¼èæ¯é¢è²ï¼å使大å·ï¼121    w_c=WordCloud(font_path='/usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/simhei.ttf',122                  background_color='white',123                  max_font_size=60,124                  margin=1)125    # åå400个è¯è¿è¡å¯è§å126    wc=w_c.fit_words({x[0]:x[1] for x in word_count.head(1000).values})127    # 设置å¾ä¼å128    plt.imshow(wc,interpolation='bilinear')129    # å»é¤è¾¹æ¡130    plt.axis('off')131    plt.show()132    # ç»è®¡åææ±ç¶å¾é¨å133    data = cloud_data_count()134    index = np.arange(data.word.size)135    # plt.figure(figsize=(6,12))136    plt.barh(index, data.ws_count, align='center', alpha=0.8)137    plt.yticks(index, data.word)138    # æ·»å æ°æ®æ ç¾139    for y, x in zip(index, data.ws_count):140        plt.text(x, y, '%.0f' % x, ha='left', va='center')141    plt.show()142def impact_analysis():143    sell_count=pandas.DataFrame({'sell_count': data_analysis('sell_count')})144    price=[]145    for i in data_analysis('price'):146        p=i.split('-')147        p_i=p[0].split('.')...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
