How to use _index_stats method in pandera

Best Python code snippet using pandera_python

admin.py

Source:admin.py Github

copy

Full Screen

1"""2Manage and administer a collection3"""4import logging5import time6import requests7from solrcloudpy.utils import CollectionBase, SolrException8from .schema import SolrSchema9from .stats import SolrIndexStats10class SolrCollectionAdmin(CollectionBase):11 """12 Manage and administer a collection13 """14 def __init__(self, connection, name):15 """16 :param connection: the connection to Solr17 :type connection: SolrConnection18 :param name: the name of the collection19 :type name: str20 """21 super(SolrCollectionAdmin, self).__init__(connection, name)22 # corresponding public methods are memoized for a lower memory footprint23 self._index_stats = None24 self._schema = None25 def exists(self):26 """27 Finds if a collection exists in the cluster28 :return: whether a collection exists in the cluster29 :rtype: bool30 """31 return self.name in self.connection.list()32 def create(self, replication_factor=1, force=False, **kwargs):33 """34 Create a collection35 :param replication_factor: an integer indicating the number of replcas for this collection36 :type replication_factor: int37 :param force: a boolean value indicating whether to force the operation38 :type force: bool39 :param kwargs: additional parameters to be passed to this operation40 :Additional Parameters:41 - `router_name`: router name that will be used. defines how documents will be distributed among the shards42 - `num_shards`: number of shards to create for this collection43 - `shards`: A comma separated list of shard names. Required when using the `implicit` router44 - `max_shards_per_node`: max number of shards/replicas to put on a node for this collection45 - `create_node_set`: Allows defining which nodes to spread the new collection across.46 - `collection_config_name`: the name of the configuration to use for this collection47 - `router_field`: if this field is specified, the router will look at the value of the field in an input document to compute the hash and identify of a shard instead of looking at the `uniqueKey` field48 - `tlog_replicas` : the number of tlog replicas to create for this collection (solr 7.0+)49 - `pull_replicas` : the number of pull replicas to create for this collection (solr 7.0+)50 - `nrt_replicas` : the number of nrt replicas to create for this collection, by default solr creates NRT replicas if not defined. (solr 7.0+)51 - `auto_add_replicas`52 Additional parameters are further documented at https://cwiki.apache.org/confluence/display/solr/Collections+API#CollectionsAPI-CreateaCollection53 Please check the the collection management documentation for your specific version of solr to verify the arguments available.54 """55 params = {56 "name": self.name,57 "replicationFactor": replication_factor,58 "action": "CREATE",59 }60 router_name = kwargs.get("router_name", "compositeId")61 params["router.name"] = router_name62 num_shards = kwargs.get("num_shards", "1")63 params["numShards"] = num_shards64 shards = kwargs.get("shards")65 if shards:66 params["shards"] = shards67 max_shards_per_node = kwargs.get("max_shards_per_node", 1)68 params["maxShardsPerNode"] = max_shards_per_node69 create_node_set = kwargs.get("create_node_set")70 if create_node_set:71 params["createNodeSet"] = create_node_set72 collection_config_name = kwargs.get("collection_config_name")73 if collection_config_name:74 params["collection.configName"] = collection_config_name75 router_field = kwargs.get("router_field")76 if router_field:77 params["router.field"] = router_field78 tlog_replicas = kwargs.get("tlog_replicas")79 if tlog_replicas:80 params["tlog_replicas"] = tlog_replicas81 pull_replicas = kwargs.get("pull_replicas")82 if pull_replicas:83 params["pull_replicas"] = pull_replicas84 nrt_replicas = kwargs.get("nrt_replicas")85 if nrt_replicas:86 params["nrt_replicas"] = nrt_replicas87 auto_add_replicas = kwargs.get("auto_add_replicas")88 if auto_add_replicas:89 params["auto_add_replicas"] = auto_add_replicas90 # this collection doesn't exist yet, actually create it91 if not self.exists() or force:92 res = self.client.get("admin/collections", params).result93 if hasattr(res, "success"):94 # Create the index and wait until it's available95 while True:96 if not self._is_index_created():97 logging.getLogger("solrcloud").info(98 "index not created yet, waiting..."99 )100 time.sleep(1)101 else:102 break103 return SolrCollectionAdmin(self.connection, self.name)104 else:105 raise SolrException(str(res))106 # this collection is already present, just return it107 return SolrCollectionAdmin(self.connection, self.name)108 def _is_index_created(self):109 """110 Whether the index was created111 :rtype: bool112 """113 server = list(self.connection.servers)[0]114 req = requests.get("%s/solr/%s" % (server, self.name))115 return req.status_code == requests.codes.ok116 def is_alias(self):117 """118 Determines if this collection is an alias for a 'real' collection119 :rtype: bool120 """121 response = self.client.get(122 "/solr/admin/collections", {"action": "CLUSTERSTATUS", "wt": "json"}123 ).result.dict124 if "aliases" in response["cluster"]:125 return self.name in response["cluster"]["aliases"]126 return False127 def drop(self):128 """129 Delete a collection130 :return: a response associated with the delete request131 :rtype: SolrResponse132 """133 return self.client.get(134 "admin/collections", {"action": "DELETE", "name": self.name}135 ).result136 def reload(self):137 """138 Reload a collection139 :return: a response associated with the reload request140 :rtype: SolrResponse141 """142 return self.client.get(143 "admin/collections", {"action": "RELOAD", "name": self.name}144 ).result145 def split_shard(self, shard, ranges=None, split_key=None):146 """147 Split a shard into two new shards148 :param shard: The name of the shard to be split.149 :type shard: str150 :param ranges: A comma-separated list of hash ranges in hexadecimal e.g. ranges=0-1f4,1f5-3e8,3e9-5dc151 :type ranges: str152 :param split_key: The key to use for splitting the index153 :type split_key: str154 :return: a response associated with the splitshard request155 :rtype: SolrResponse156 """157 params = {"action": "SPLITSHARD", "collection": self.name, "shard": shard}158 if ranges:159 params["ranges"] = ranges160 if split_key:161 params["split.key"] = split_key162 return self.client.get("admin/collections", params).result163 def create_shard(self, shard, create_node_set=None):164 """165 Create a new shard166 :param shard: The name of the shard to be created.167 :type shard: str168 :param create_node_set: Allows defining the nodes to spread the new collection across.169 :type create_node_set: str170 :return: a response associated with the createshard request171 :rtype: SolrResponse172 """173 params = {"action": "CREATESHARD", "collection": self.name, "shard": shard}174 if create_node_set:175 params["create_node_set"] = create_node_set176 return self.client.get("admin/collections", params).result177 def create_alias(self, alias):178 """179 Create or modify an alias for a collection180 :param alias: the name of the alias181 :type alias: str182 :return: a response associated with the createalias request183 :rtype: SolrResponse184 """185 params = {"action": "CREATEALIAS", "name": alias, "collections": self.name}186 return self.client.get("admin/collections", params).result187 def delete_alias(self, alias):188 """189 Delete an alias for a collection190 :param alias: the name of the alias191 :type alias: str192 :return: a response associated with the deletealias request193 :rtype: SolrResponse194 """195 params = {"action": "DELETEALIAS", "name": alias}196 return self.client.get("admin/collections", params).result197 def delete_replica(self, replica, shard):198 """199 Delete a replica200 :param replica: The name of the replica to remove.201 :type replica: str202 :param shard: The name of the shard that includes the replica to be removed.203 :type shard: str204 :return: a response associated with the deletereplica request205 :rtype: SolrResponse206 """207 params = {208 "action": "DELETEREPLICA",209 "replica": replica,210 "collection": self.name,211 "shard": shard,212 }213 return self.client.get("admin/collections", params).result214 @property215 def state(self):216 """217 Get the state of this collection218 :return: the state of this collection219 :rtype: dict220 """221 if self.is_alias():222 return {"warn": "no state info available for aliases"}223 response = self.client.get(224 "/{webappdir}/admin/collections".format(225 webappdir=self.connection.webappdir226 ),227 dict(action="clusterstatus"),228 ).result229 try:230 return response["cluster"]["collections"][self.name]231 except KeyError:232 return {}233 @property234 def shards(self):235 """236 See state method237 :rtype: dict238 """239 return self.state240 @property241 def index_info(self):242 """243 Get a high-level overview of this collection's index244 :return: information about an index245 :rtype: dict246 """247 response = self.client.get("%s/admin/luke" % self.name, {}).result248 # XXX ugly249 data = response["index"].dict250 data.pop("directory", None)251 data.pop("userData", None)252 return data253 @property254 def index_stats(self):255 """256 Retrieves the SolrIndexStats class257 :return: SolrIndexStats class258 :rtype: SolrIndexStats259 """260 if self._index_stats is None:261 self._index_stats = SolrIndexStats(self.connection, self.name)262 return self._index_stats263 @property264 def schema(self):265 """266 Retrieves the SolrSchema class267 :return: SolrSchema class268 :rtype: SolrSchema269 """270 if self._schema is None:271 self._schema = SolrSchema(self.connection, self.name)272 return self._schema273 @property274 def stats(self):275 """276 Alias for retrieving the SolrIndexStats class277 :return: SolrIndexStats class278 :rtype: SolrIndexStats279 """280 return self.index_stats281 def _backup_restore_action(282 self, action, backup_name, location=None, repository=None283 ):284 """285 Creates or restores a backup for a collection, based on the action286 :param action: the action, either BACKUP or RESTORE287 :type action: str288 :param backup_name: the name of the backup we will use for storage & restoration289 :type backup_name: str290 :param location: an optional param to define where on the shared filesystem we should store the backup291 :type location: str292 :param repository: an optional param to define a repository type. filesystem is the default293 :type repository: str294 :return: an async response295 :rtype: AsyncResponse296 """297 params = {"action": action, "collection": self.name, "name": backup_name}298 if location:299 params["location"] = location300 if repository:301 params["repository"] = repository302 return self.client.get("admin/collections", params, asynchronous=True)303 def backup(self, backup_name, location=None, repository=None):304 """305 Creates a backup for a collection306 :param backup_name: the name of the backup we will use for storage & restoration307 :type backup_name: str308 :param location: an optional param to define where on the shared filesystem we should store the backup309 :type location: str310 :param repository: an optional param to define a repository type. filesystem is the default311 :type repository: str312 :return: an async response313 :rtype: AsyncResponse314 """315 return self._backup_restore_action(316 "BACKUP", backup_name, location=location, repository=repository317 )318 def restore(self, backup_name, location=None, repository=None):319 """320 Restores a backup for a collection321 :param backup_name: the name of the backup we will use for restoration322 :type backup_name: str323 :param location: an optional param to define where on the shared filesystem we should access the backup324 :type location: str325 :param repository: an optional param to define a repository type. filesystem is the default326 :type repository: str327 :return: an async response328 :rtype: AsyncResponse329 """330 return self._backup_restore_action(331 "RESTORE", backup_name, location=location, repository=repository332 )333 def request_status(self, async_response):334 """335 Retrieves the status of a request for a given async result336 :param async_response: the response object that includes its async_id337 :type async_response: AsyncResponse338 :return:339 """340 return self.client.get(341 "admin/collections",342 {343 "action": "REQUESTSTATUS",344 "requestid": async_response.async_id,345 "wt": "json",346 },347 ).result348 def request_state(self, async_response):349 """350 Retrieves the request state of a request for a given async result351 :param async_response: the response object that includes its async_id352 :type async_response: AsyncResponse353 :return:354 """...

Full Screen

Full Screen

schema_statistics.py

Source:schema_statistics.py Github

copy

Full Screen

...31 "name": series.name,32 }33def infer_index_statistics(index: Union[pd.Index, pd.MultiIndex]):34 """Infer index statistics given a pandas Index object."""35 def _index_stats(index_level):36 dtype = _get_array_type(index_level)37 return {38 "dtype": dtype,39 "nullable": bool(index_level.isna().any()),40 "checks": _get_array_check_statistics(index_level, dtype),41 "name": index_level.name,42 }43 if isinstance(index, pd.MultiIndex):44 index_statistics = [45 _index_stats(index.get_level_values(i))46 for i in range(index.nlevels)47 ]48 elif isinstance(index, pd.Index):49 index_statistics = [_index_stats(index)]50 else:51 warnings.warn(52 f"index type {type(index)} not recognized, skipping index inference",53 UserWarning,54 )55 index_statistics = []56 return index_statistics if index_statistics else None57def parse_check_statistics(check_stats: Union[Dict[str, Any], None]):58 """Convert check statistics to a list of Check objects."""59 if check_stats is None:60 return None61 checks = []62 for check_name, stats in check_stats.items():63 check = getattr(Check, check_name)...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful