Best Python code snippet using autotest_python
populate_from_mentions.py
Source:populate_from_mentions.py  
1'''2Populate the staging area graph from the software mention imported documents3'''4import os5import json6from arango import ArangoClient7from populate_staging_area import StagingArea8import logging9import logging.handlers10from tqdm import tqdm11def populate(stagingArea):12    database_name_mentions = "mentions"13    print("Populate staging area from software mention import")14    if not stagingArea.sys_db.has_database(database_name_mentions):15        logging.error("Software mention import database does not exist: you need to first import the software mention resources")16    stagingArea.db = stagingArea.client.db(database_name_mentions, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])17    populate_mentions(stagingArea, stagingArea.get_source(database_name_mentions))18def populate_mentions(stagingArea, source_ref):19    '''20    Software mentions at this stage are all represented as independent software entity (very light-weight and with 21    the few extracted attributes). The information related to the mention in context are represented with the edge 22    relation "citations", with a "quotes work" (P6166) property to store the software (=work) mentioned and "quotation" 23    (P7081) for storing the whole context of mention (the target sentence). 24    Other relations built are funding (via Crossref funders) and references. 25    '''26    # given the possible number of documents, we use pagination rather than a large ttl 27    cursor = stagingArea.db.aql.execute(28      'FOR doc IN documents RETURN doc', full_count=True29    )30    stats = cursor.statistics()31    total_results = 032    if 'fullCount' in stats:33        total_results = stats['fullCount']34    page_size = 100035    nb_pages = (total_results // page_size)+136    print("entries:", total_results, ", nb. steps:", nb_pages)37    for page_rank in tqdm(range(0, nb_pages)):38        cursor = stagingArea.db.aql.execute(39            'FOR doc IN documents LIMIT ' + str(page_rank*page_size) + ', ' + str(page_size) + ' RETURN doc', ttl=360040        )41        for document in cursor:42            # document as document vertex collection43            local_doc = stagingArea.init_entity_from_template("document", source=source_ref)44            if local_doc is None:45                raise("cannot init document entity from default template")46            local_doc['_key'] = document["_key"]47            local_doc['_id'] = "documents/" + document["_key"]48            # document metadata stays as they are (e.g. full CrossRef record)49            local_doc['metadata'] = document['metadata']50            if "DOI" in document['metadata']:51                local_doc['index_doi'] = document['metadata']['DOI'].lower()52            # unfortunately the casing of the key DOI field is unreliable53            if "doi" in document['metadata']:54                local_doc['index_doi'] = document['metadata']['doi'].lower()55            if "title" in document['metadata'] and len(document['metadata']['title'])>0 and 'author' in document['metadata'] and len(document['metadata']['author'])>0:56                local_title = document['metadata']['title']57                local_author = None58                if 'author' in document['metadata']:59                    # we normally always have an author field60                    local_author = document['metadata']['author']61                if local_author != None and local_title != None:62                    local_title_author_key = stagingArea.title_author_key(local_title, local_author)63                    if local_title_author_key != None and len(local_title_author_key)>0:64                        local_doc['index_title_author'] = local_title_author_key65            if not stagingArea.staging_graph.has_vertex(local_doc["_id"]):66                stagingArea.staging_graph.insert_vertex("documents", local_doc)67            # there are two relations to be built at this level:68            # - authorship based on "author" metadata field (edge "actor" from "persons" to "documents")69            # -> as we consider here text-mined documents, we might better not important every authors as entities at this stage70            # and keep only authors from key references cited together with software in mention71            # - funding based on crossref "funder" metadata field (edge "funding" from "organizations" to "documents")        72            '''73            if 'funder' in document['metadata'] and len(document['metadata']['funder'])>0:74                for funder in document['metadata']['funder']:75                    # in WorkFunder, funder is defined by 'name', a 'DOI' (uppercase here, related to the funder), 76                    # 'country' (conversion from medline/pubmed) 77                    # funding is defined by 'award' [array] (optional)78                    # the DOI here contains thefunder id and it should make possible to get a full CrossRef funder 79                    # entry /funders/{id}80                    # DOI 10.13039/100004440, funder id is 10000444081                    # https://api.crossref.org/funders/100004440/ -> Wellcome82                    # apparently 10.13039/ is the prefix for all funders? 83                    funderID = None84                    if "DOI" in funder:85                        funderDOI = funder['DOI']86                        ind = funderDOI.find('/')87                        if ind != -1:88                            funderID = funderDOI[ind+1:]89                    if funderID == None:90                        continue91                    # full funder record at Crossref92                    # Crossref funder ID is P315393                    # create an organization entity, if not already present with this funder identifier via P315394                                    95                    replaced = False96                    # we check if the organization is not already in the KB, and aggregate/merge with this existing one if yes97                    cursor = stagingArea.db.aql.execute(98                        'FOR doc IN organizations FILTER ['+funderID+'] ANY IN doc["claims"]["P3153"][*]["value"] LIMIT 1 RETURN doc'99                    )100                    if cursor.count()>0:101                        existing_organization = cursor.next()102                        existing_organization = stagingArea.aggregate_with_merge(existing_organization, organization)103                        #del existing_software["_rev"]104                        #print(existing_software)105                        stagingArea.staging_graph.update_vertex(existing_organization)106                        organization = existing_organization107                        replaced = True108                    if not replaced:109                        # organization as document vertex collection110                        local_org = stagingArea.init_entity_from_template("organization", source=source_ref)111                        if local_org is None:112                            raise("cannot init organization entity from default template") 113                        organization["labels"] = org_name114                        local_org_id = stagingArea.get_uid()115                        organization["_key"] = local_org_id116                        organization["_id"] = "organizations/" + organization["_key"]117                        stagingArea.staging_graph.insert_vertex("organizations", organization)118                    # funding relation119                    relation = {}120                    relation["claims"] = {}121                    relation["claims"]['P8324'] = [ {"references": [ source_ref ] } ]122                    relation["_from"] = organization["_id"]123                    relation["_to"] = "documents/" + document["_key"]124                    relation["_id"] = "funding/" + organization["_key"] + "_" + document["_key"]125                    stagingArea.staging_graph.insert_edge("funding", edge=relation)126            '''127            # we process all the annotations from this document, which makes possible some (modest) optimizations128            cursor_annot = stagingArea.db.aql.execute(129              "FOR doc IN annotations FILTER doc.document.$oid == '" + local_doc['_key'] + "' RETURN doc", ttl=60130            )131            software_name_processed = {}132            index_annot = 0133            for annotation in cursor_annot:134                # annotations from the same document lead to a set of new software entity (to be further disambiguated)135                # software with the same name in the same document are considered as the same entity and what is 136                # extracted for each annotation is aggregated in this single entity137                new_entity = False138                if not annotation["software-name"]["normalizedForm"] in software_name_processed:139                    # new entity140                    software = stagingArea.init_entity_from_template("software", source=source_ref)141                    if software is None:142                        raise("cannot init software entity from default template")143                    software['labels'] = annotation["software-name"]["normalizedForm"]144                    new_entity = True145                else:146                    # otherwise get the existing entity for this software147                    software = software_name_processed[annotation["software-name"]["normalizedForm"]]148                # version info (P348)149                if "version" in annotation and not check_value_exists(software["claims"], "P348", annotation["version"]):150                    local_value = {}151                    local_value["value"] = annotation["version"]["normalizedForm"]152                    local_value["datatype"] = "string"153                    local_value["references"] = []154                    local_value["references"].append(source_ref)155                    if not "P348" in software["claims"]:156                        software["claims"]["P348"] = []157                    software["claims"]["P348"].append(local_value)158                    changed = True159                if "publisher" in annotation and not check_value_exists(software["claims"], "P123", annotation["publisher"]):160                    # publisher (P123) 161                    local_value = {}162                    local_value["value"] = annotation["publisher"]["normalizedForm"]163                    local_value["datatype"] = "string"164                    local_value["references"] = []165                    local_value["references"].append(source_ref)166                    if not "P123" in software["claims"]:167                        software["claims"]["P123"] = []168                    software["claims"]["P123"].append(local_value)169                    changed = True170                if "url" in annotation and not check_value_exists(software["claims"], "P854", annotation["url"]):171                    # reference URL (P854) 172                    local_value = {}173                    local_value["value"] = annotation["url"]["normalizedForm"]174                    local_value["value"] = local_value["value"].replace(" ", "")175                    local_value["datatype"] = "url"176                    local_value["references"] = []177                    local_value["references"].append(source_ref)178                    if not "P854" in software["claims"]:179                        software["claims"]["P854"] = []180                    software["claims"]["P854"].append(local_value)181                    changed = True182                # the predicted wikidata entity and Wikipedia english page for the software are represented with property 183                # "said to be the same" (P460), which is defined as "said to be the same as that item, but it's uncertain or disputed"184                if "wikipediaExternalRef" in annotation and not check_value_exists(software["claims"], "P460", annotation["wikipediaExternalRef"]):185                    # imported from Wikimedia project (P143) 186                    local_value = {}187                    local_value["value"] = "https://en.wikipedia.org/?curid=" + str(annotation["wikipediaExternalRef"])188                    local_value["datatype"] = "url"189                    local_value["references"] = []190                    local_value["references"].append(source_ref)191                    if not "P460" in software["claims"]:192                        software["claims"]["P460"] = []193                    software["claims"]["P460"].append(local_value)194                    changed = True195                196                if "wikidataId" in annotation and not check_value_exists(software["claims"], "P460", annotation["wikidataId"]):197                    local_value = {}198                    local_value["value"] = annotation["wikidataId"]199                    local_value["datatype"] = "wikibase-item"200                    local_value["references"] = []201                    local_value["references"].append(source_ref)202                    if not "P460" in software["claims"]:203                        software["claims"]["P460"] = []204                    software["claims"]["P460"].append(local_value)205                    software["index_entity"] = annotation["wikidataId"]206                    changed = True207                # bibliographical references associated to the software could be aggregated here, possibly with count information208                # -> to be reviewed209                if new_entity:210                    local_id = stagingArea.get_uid()211                    software['_key'] = local_id212                    software['_id'] = "software/" + local_id213                    stagingArea.staging_graph.insert_vertex("software", software)214                    software_name_processed[annotation["software-name"]["normalizedForm"]] = software215                elif changed:216                    stagingArea.staging_graph.update_vertex(software)217                # relations to be built at this level:218                # - citations based on software mention in a document, which will include context sentence, coordinates, etc.219                #   here document are fully specified (with PDF hash, page coordinates, etc.) because it has been "text-mined"220                # - references, which relate a software or a document (where the reference is expressed) to a document 221                #   (and less frequently to a software), the document here can be simply a set of bibliographical metadata or222                #   a fully specified document223                relation = stagingArea.init_entity_from_template("citation", source=source_ref)224                if relation is None:225                    raise("cannot init citation relation from default template")226                # store original software name string - always present normally227                # we use property P6166 ("quote work", here the work is the mentioned software)228                if "software-name" in annotation:229                    local_value = {}230                    local_value["value"] = annotation["software-name"]["normalizedForm"]231                    local_value["datatype"] = "string"232                    local_value["references"] = []233                    local_value["references"].append(source_ref)234                    235                    # bounding box in qualifier236                    # relevant property is "relative position within image" (P2677) 237                    if "boundingBoxes" in annotation["software-name"]:238                        local_qualifier = {}239                        local_qualifier_value = {}240                        local_qualifier_value["value"] = annotation["software-name"]["boundingBoxes"]241                        local_qualifier_value["datatype"] = "string"242                        local_qualifier["P2677"] = local_qualifier_value243                        local_value["qualifiers"] = []244                        local_value["qualifiers"].append(local_qualifier)245                    relation["claims"]["P6166"] = []246                    relation["claims"]["P6166"].append(local_value)247                # store all original attributes in this citation relation, as they are in this annotation248                # version info (P348)249                if "version" in annotation:250                    local_value = {}251                    local_value["value"] = annotation["version"]["normalizedForm"]252                    local_value["datatype"] = "string"253                    local_value["references"] = []254                    local_value["references"].append(source_ref)255                    256                    # bounding box in qualifier257                    # relevant property is "relative position within image" (P2677) 258                    if "boundingBoxes" in annotation["version"]:259                        local_qualifier = {}260                        local_qualifier_value = {}261                        local_qualifier_value["value"] = annotation["version"]["boundingBoxes"]262                        local_qualifier_value["datatype"] = "string"263                        local_qualifier["P2677"] = local_qualifier_value264                        local_value["qualifiers"] = []265                        local_value["qualifiers"].append(local_qualifier)266                    relation["claims"]["P348"] = []267                    relation["claims"]["P348"].append(local_value)268                if "publisher" in annotation:269                    # publisher (P123) 270                    local_value = {}271                    local_value["value"] = annotation["publisher"]["normalizedForm"]272                    local_value["datatype"] = "string"273                    local_value["references"] = []274                    local_value["references"].append(source_ref)275                    # bounding box in qualifier276                    # relevant property is "relative position within image" (P2677) 277                    if "boundingBoxes" in annotation["publisher"]:278                        local_qualifier = {}279                        local_qualifier_value = {}280                        local_qualifier_value["value"] = annotation["publisher"]["boundingBoxes"]281                        local_qualifier_value["datatype"] = "string"282                        local_qualifier["P2677"] = local_qualifier_value283                        local_value["qualifiers"] = []284                        local_value["qualifiers"].append(local_qualifier)285                    relation["claims"]["P123"] = []286                    relation["claims"]["P123"].append(local_value)287                if "url" in annotation:288                    # reference URL (P854) 289                    local_value = {}290                    local_value["value"] = annotation["url"]["normalizedForm"]291                    local_value["datatype"] = "url"292                    local_value["references"] = []293                    local_value["references"].append(source_ref)294                    # bounding box in qualifier295                    # relevant property is "relative position within image" (P2677) 296                    if "boundingBoxes" in annotation["url"]:297                        local_qualifier = {}298                        local_qualifier_value = {}299                        local_qualifier_value["value"] = annotation["url"]["boundingBoxes"]300                        local_qualifier_value["datatype"] = "string"301                        local_qualifier["P2677"] = local_qualifier_value302                        local_value["qualifiers"] = []303                        local_value["qualifiers"].append(local_qualifier)304                    relation["claims"]["P854"] = []305                    relation["claims"]["P854"].append(local_value)306                if "wikipediaExternalRef" in annotation:307                    # imported from Wikimedia project (P143) 308                    local_value = {}309                    local_value["value"] = annotation["wikipediaExternalRef"]310                    local_value["datatype"] = "url"311                    local_value["references"] = []312                    local_value["references"].append(source_ref)313                    relation["claims"]["P460"] = []314                    relation["claims"]["P460"].append(local_value)315                316                if "wikidataId" in annotation:317                    local_value = {}318                    local_value["value"] = annotation["wikidataId"]319                    local_value["datatype"] = "wikibase-item"320                    local_value["references"] = []321                    local_value["references"].append(source_ref)322                    relation["claims"]["P460"] = []323                    relation["claims"]["P460"].append(local_value)324                if "context" in annotation:    325                    # quotation or excerpt (P7081) 326                    local_value = {}327                    local_value["value"] = annotation["context"]328                    local_value["datatype"] = "string"329                    local_value["references"] = []330                    local_value["references"].append(source_ref)331                    # bounding box in qualifier332                    # relevant property is "relative position within image" (P2677) 333                    # note: currently bounding box for the context sentence not outputted by the software-mention module, but334                    # we can load it if present for the future335                    if "boundingBoxes" in annotation:336                        local_qualifier = {}337                        local_qualifier_value = {}338                        local_qualifier_value["value"] = annotation["boundingBoxes"]339                        local_qualifier_value["datatype"] = "string"340                        local_qualifier["P2677"] = local_qualifier_value341                        local_value["qualifiers"] = []342                        local_value["qualifiers"].append(local_qualifier)343                    relation["claims"]["P7081"] = []344                    relation["claims"]["P7081"].append(local_value)345                relation["_from"] = "documents/" + local_doc["_key"]346                relation["_to"] = "software/" + software['_key']347                relation["_key"] = local_doc["_key"] + "_" + software['_key'] + "_" + str(index_annot)348                relation["_id"] = "citations/" + relation["_key"]349                stagingArea.staging_graph.insert_edge("citations", edge=relation)350                # bibliographical reference attached to the citation context, this will be represented as 351                # a reference relation, from the citing document to the cited document, with related software information352                # in the relation353                if "references" in annotation:354                    for reference in annotation["references"]:355                        # store the referenced document as document vertex (it will be deduplicated in a further stage) if not already present356                        referenced_document = None357                        cursor_ref = stagingArea.documents.find({'_key': reference["reference_id"]["$oid"]}, skip=0, limit=1)358                        if cursor_ref.count()>0:359                            referenced_document = cursor_ref.next()360                        if referenced_document == None:361                            # this is the usual case. we create a new document entity from the extracted bibliographical reference metadata362                            referenced_document = stagingArea.init_entity_from_template("document", source=source_ref)363                            if referenced_document is None:364                                raise("cannot init document entity from default template")365                            referenced_document['_key'] = reference["reference_id"]["$oid"]366                            referenced_document['_id'] = "documents/" + reference["reference_id"]["$oid"]367                            # get the metadata from the mentions database368                            mention_reference = stagingArea.db.collection('references').get({'_key': reference["reference_id"]["$oid"]})369                            if mention_reference is None:370                                logging.warning("warning: reference object indicated in an annotation does not exist, _key: " + reference["reference_id"]["$oid"])371                                continue372                            # document metadata stays as they are (e.g. full CrossRef record)373                            referenced_document['metadata'] = stagingArea.tei2json(mention_reference['tei'])374                            # DOI index375                            if "DOI" in referenced_document['metadata']:376                                referenced_document["index_doi"] = referenced_document['metadata']['DOI'].lower()377                            # title/first author last name index378                            if "title" in referenced_document['metadata'] and "author" in referenced_document['metadata']:379                                local_key = stagingArea.title_author_key(referenced_document['metadata']['title'], referenced_document['metadata']['author'])380                                if local_key != None:381                                    referenced_document["index_title_author"] = local_key382                            if not stagingArea.staging_graph.has_vertex(referenced_document["_id"]):383                                stagingArea.staging_graph.insert_vertex("documents", referenced_document)384                        # property is "cites work" (P2860) that we can include in the citation edge385                        local_value = {}386                        local_value["value"] = reference["reference_id"]["$oid"]387                        local_value["datatype"] = "external-id"388                        local_value["references"] = []389                        local_value["references"].append(source_ref)390                        # bounding box in qualifier391                        # relevant property is "relative position within image" (P2677) 392                        # note: currently bounding box for the context sentence not outputted by the software-mention module, but393                        # we can load it if present for the future394                        if "boundingBoxes" in reference:395                            local_qualifier = {}396                            local_qualifier_value = {}397                            local_qualifier_value["value"] = reference["boundingBoxes"]398                            local_qualifier_value["datatype"] = "string"399                            local_qualifier["P2677"] = local_qualifier_value400                            local_value["qualifiers"] = []401                            local_value["qualifiers"].append(local_qualifier)402                        # refkey in qualifier (number of the reference in the full bibliographical section of the citing paper)403                        if "refkey" in reference:404                            local_qualifier = {}405                            local_qualifier_value = {}406                            local_qualifier_value["value"] = reference["refkey"]407                            local_qualifier_value["datatype"] = "string"408                            local_qualifier["PA02"] = local_qualifier_value409                            if "qualifiers" not in local_value:410                                local_value["qualifiers"] = []411                            local_value["qualifiers"].append(local_qualifier)412                        # label is the reference marker used by the citing paper as call-out to the full reference entry 413                        if "label" in reference:414                            local_qualifier = {}415                            local_qualifier_value = {}416                            local_qualifier_value["value"] = reference["label"]417                            local_qualifier_value["datatype"] = "string"418                            local_qualifier["PA03"] = local_qualifier_value419                            if "qualifiers" not in local_value:420                                local_value["qualifiers"] = []421                            local_value["qualifiers"].append(local_qualifier)422                        if not "P2860" in relation["claims"]:423                            relation["claims"]["P2860"] = []424                        relation["claims"]["P2860"].append(local_value)425                         # reference relation with specific edge426                        relation_ref = {}427                        relation_ref["claims"] = {}428                        # "P2860" property "cites work ", add software associated to the citation context,429                        relation_ref["claims"]["P2860"] = []430                        local_value = {}431                        local_value["value"] = "software/" + software['_key']432                        local_value["datatype"] = "external-id"433                        local_value["references"] = []434                        local_value["references"].append(source_ref)435                        relation_ref["claims"]["P2860"].append(local_value)436                        # we add an index to the software identifier, which will be useful when filtering the437                        # references related to a given software438                        relation_ref["index_software"] = "software/" + software['_key']439                        relation_ref["_from"] = local_doc['_id']440                        relation_ref["_to"] = referenced_document['_id']441                        relation_ref["_key"] = local_doc["_key"] + "_" + referenced_document['_key'] + "_" + str(index_annot)442                        relation_ref["_id"] = "references/" + relation_ref["_key"]443                        if not stagingArea.staging_graph.has_edge(relation_ref["_id"]):444                            stagingArea.staging_graph.insert_edge("references", edge=relation_ref)445                # update citation edge document with the added reference information446                stagingArea.staging_graph.update_edge(relation)447                index_annot += 1448def check_value_exists(claim, property_name, value):449    '''450    Check in the claim if a property is present and if the property has the given value451    '''452    if property_name in claim:453        for claim_value in claim[property_name]:454            if claim_value["value"] == value:455                return True...populate_from_r.py
Source:populate_from_r.py  
1'''2Populate the staging area graph from CRAN and rOpenSci imported documents3'''4import os5import json6import pybtex7from arango import ArangoClient8from populate_staging_area import StagingArea9import logging10import logging.handlers11from tqdm import tqdm12def populate(stagingArea):13    database_name_rOpenSci = "rOpenSci"14    database_name_cran = "CRAN" 15    print("Populate staging area from rOpenSci")16    if not stagingArea.sys_db.has_database(database_name_rOpenSci):17        print("rOpenSci import database does not exist: you need to first import rOpenSci resources")18    stagingArea.db = stagingArea.client.db(database_name_rOpenSci, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])19    packages = stagingArea.db.collection('packages')20    populate_r(stagingArea, packages, stagingArea.get_source(database_name_rOpenSci))21    print("Populate staging area from CRAN")22    if not stagingArea.sys_db.has_database(database_name_cran):23        print("CRAN import database does not exist: you need to first import CRAN resources")24    stagingArea.db = stagingArea.client.db(database_name_cran, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])25    packages = stagingArea.db.collection('packages')26    populate_r(stagingArea, packages, stagingArea.get_source(database_name_cran))27    # we set the dependencies in a second pass, having all the packages entities put in relation now set28    print("dependencies rOpenSci...")29    stagingArea.db = stagingArea.client.db(database_name_rOpenSci, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])30    packages = stagingArea.db.collection('packages')31    set_dependencies(stagingArea, packages, stagingArea.get_source(database_name_rOpenSci))32    print("dependencies CRAN...")33    stagingArea.db = stagingArea.client.db(database_name_cran, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])34    packages = stagingArea.db.collection('packages')35    set_dependencies(stagingArea, packages, stagingArea.get_source(database_name_cran))36    37def populate_r(stagingArea, collection, source_ref):38    relator_file = os.path.join("data", "resources", "relator_code_cran.json")39    if not os.path.isfile(relator_file): 40        logging.error("Error when loading relator code: " + relator_file)41        return None42    with open(relator_file) as relator_f:43        relator_code_cran = json.load(relator_f)44    cursor = stagingArea.db.aql.execute(45      'FOR doc IN packages RETURN doc', ttl=3600, full_count=True46    )47    stats = cursor.statistics()48    total_packages = 049    if 'fullCount' in stats:50        total_packages = stats['fullCount']51    pbar = tqdm(total=total_packages)52    for package in cursor:53        #print(package['Package'], "...")54        # package as software vertex collection55        software = stagingArea.init_entity_from_template("software", source=source_ref)56        if software is None:57            raise("cannot init software entity from default template")58        software['labels'] = package['Package']59        # wikidata description are short phrase, so it correspond to R package title, 60        if "Title" in package:61            software['descriptions'] = package['Title']62        # for the actual package description, there is no "content summary" property, so we introduce a field "summary"63        if 'Description' in package:64            software['summary'] = package['Description']65        #software['id'] = package['_key']66        if stagingArea.db.name == "CRAN":67            # for CRAN we don't have random ID, so we have to create one - to be consistent with MongoDB ones68            # used for most of the others sources, we use an hexa identifier of length 24 69            local_id = stagingArea.get_uid()70            software['_key'] = local_id71            software['_id'] = "software/" + local_id72        else:73            software['_key'] = package['_key']74            software['_id'] = "software/" + package['_key']75        if "git_repository" in package:76            local_value = {}77            local_value["value"] = package["git_repository"]78            local_value["datatype"] = "url"79            local_value["references"] = []80            local_value["references"].append(source_ref)81            software["claims"]["P1324"] = []82            software["claims"]["P1324"].append(local_value)83        # programming language (P277) is always R (Q206904) here84        local_value = {}85        local_value["value"] = "Q206904"86        local_value["datatype"] = "wikibase-item"87        local_value["references"] = []88        local_value["references"].append(source_ref)89        software["claims"]["P277"] = []90        software["claims"]["P277"].append(local_value)91        # copyright license is P27592        if "License" in package:93            local_value = {}94            local_value["value"] = package["License"]95            local_value["datatype"] = "string"96            local_value["references"] = []97            local_value["references"].append(source_ref)98            # this will be the object of a further disambiguation to match the license entity99            software["claims"]["P275"] = []100            software["claims"]["P275"].append(local_value)101        # version info (P348)102        if "Version" in package:103            local_value = {}104            local_value["value"] = package["Version"]105            local_value["datatype"] = "string"106            local_value["references"] = []107            local_value["references"].append(source_ref)108            software["claims"]["P348"] = []109            software["claims"]["P348"].append(local_value)110        # official website url is P856 and usuer manual/documentation url is P2078111        # for rOpenSci manual/doc is always with https://docs.ropensci.org/ prefix112        # for CRAN we have a distinct manual field usually pointing to a PDF, 113        # URL being for the official website (or git repo)114        if "Manual" in package:115            local_value = {}116            local_value["value"] = package["Manual"]117            local_value["datatype"] = "url"118            local_value["references"] = []119            local_value["references"].append(source_ref)120            software["claims"]["P2078"] = []121            software["claims"]["P2078"].append(local_value)122        if "URL" in package and len(package["URL"]) > 0:123            for url in package["URL"]:124                local_value = {}125                local_value["value"] = url126                local_value["datatype"] = "url"127                local_value["references"] = []128                local_value["references"].append(source_ref)129                if not "P2078" in software["claims"]:130                    software["claims"]["P2078"] = []131                software["claims"]["P2078"].append(local_value)132        # original identifier133        local_value = {}134        local_value["value"] = package["_key"]135        local_value["datatype"] = "external-id"136        if stagingArea.db.name == "CRAN":137            software["claims"]["P5565"] = []138            software["claims"]["P5565"].append(local_value)139        else:140            software["claims"]["PA1"] = []141            software["claims"]["PA1"].append(local_value)142        replaced = False143        if stagingArea.db.name == "CRAN":144            # we check if the package is not already in the KB, and aggregate/merge with this existing one if yes145            cursor = stagingArea.software.find({'labels': package["Package"]}, skip=0, limit=1)146            if cursor.count()>0:147                existing_software = cursor.next()148                existing_software = stagingArea.aggregate_with_merge(existing_software, software)149                #del existing_software["_rev"]150                #print(existing_software)151                stagingArea.staging_graph.update_vertex(existing_software)152                software = existing_software153                replaced = True154        if not replaced:155            stagingArea.staging_graph.insert_vertex("software", software)156            maintainer = None157            if "Maintainer" in package:158                maintainer = package["Maintainer"]159            # authors 160            if "Authors@R" in package:161                for author in package["Authors@R"]:162                    maintainer_consumed = process_author(stagingArea, author, software['_key'], relator_code_cran, source_ref, maintainer)163                    if maintainer_consumed:164                        maintainer = None165            elif "Authors" in package:166                # author field is relevant only if Authors@R is not 167                for author in package["Authors"]:168                    maintainer_consumed = process_author(stagingArea, author, software['_key'], relator_code_cran, source_ref, maintainer)169                    if maintainer_consumed:170                        maintainer = None171            elif "Author" in package:172                # author field is relevant only if Authors@R is not 173                for author in package["Author"]:174                    maintainer_consumed = process_author(stagingArea, author, software['_key'], relator_code_cran, source_ref, maintainer)175                    if maintainer_consumed:176                        maintainer = None177        if "References" in package:178            for reference in package["References"]:179                stagingArea.process_reference_block(package["References"], software, source_ref)180                # this will add "references" relation between the software and the referenced documents181        pbar.update(1)182    pbar.close()183        184def set_dependencies(stagingArea, collection, source_ref):185    # we use an AQL query to avoid limited life of cursor that cannot be changed otherwise186    cursor = stagingArea.db.aql.execute(187      'FOR doc IN packages RETURN doc', ttl=600188    )189    # this pass will set the dependencies190    for package in cursor:191        if not "_hard_deps" in package and not "_soft_deps" in package:192            continue193        # get the first software entry194        cursor = stagingArea.software.find({'labels': package["Package"]}, skip=0, limit=1)195        if cursor.count()>0:196            software1 = cursor.next()197        else:198            continue199        # hard dependencies are edge relations200        if "_hard_deps" in package:201            for dependency in package["_hard_deps"]:202                # relation are via the dependencies edge collection, they relate two software (or packages or libraries)203                # property is "depends on software" (P1547) 204                cursor = stagingArea.software.find({'labels': dependency["package"]}, skip=0, limit=1)205                if cursor.count()>0:206                    software2 = cursor.next()207                    add_dependency(stagingArea, dependency, software1, software2, source_ref, the_type="hard")208                else:209                   continue210        # soft dependencies211        if "_soft_deps" in package:212            for dependency in package["_soft_deps"]:213                # relation are via the dependencies edge collection, they relate two software (or packages or libraries)214                # property is "depends on software" (P1547) 215                cursor = stagingArea.software.find({'labels': dependency["package"]}, skip=0, limit=1)216                if cursor.count()>0:217                    software2 = cursor.next()218                    add_dependency(stagingArea, dependency, software1, software2, source_ref, the_type="soft")219                else:220                   continue                221def add_dependency(stagingArea, dependency, software1, software2, source_ref, the_type=None):222    # relation are via the dependencies edge collection, they relate two software (or packages or libraries)223    # property is "depends on software" (P1547) 224    relation = {}225    relation["claims"] = {}226    relation["claims"]["P1547"] = []227    # add version (P348) if present228    if "version" in dependency:229        local_value = {}230        local_value["value"] = dependency["version"]231        local_value["datatype"] = "string"232        local_value["references"] = []233        local_value["references"].append(source_ref)234        relation["claims"]["P348"] = []235        relation["claims"]["P348"].append(local_value)236    # indicate hard or soft dependencies, we express it as qualitfier of the P1547 property,237    # with the property "has quality" (P1552) with string value (normally it's an item value, 238    # but we have to relax somewhere to express that easily)239    local_value = {}240    local_value["value"] = the_type241    local_value["datatype"] = "string"242    local_value["references"] = []243    local_value["references"].append(source_ref)244    relation["claims"]["P1547"].append({"qualifiers": {}})245    relation["claims"]["P1547"][0]["qualifiers"]["P1552"] = []246    relation["claims"]["P1547"][0]["qualifiers"]["P1552"].append(local_value)247    relation["_from"] = software1['_id']248    # find the target dependency software with its unique package name249    relation["_to"] = software2['_id']250    relation["_key"] = software1["_key"] + "_" + software2['_key'] + "_hard"251    relation["_id"] = "dependencies/" + relation["_key"]252    if not stagingArea.staging_graph.has_edge(relation["_id"]):253        stagingArea.staging_graph.insert_edge("dependencies", edge=relation)254def process_author(stagingArea, author, software_key, relator_code_cran, source_ref, maintainer=None):255    '''256    Process an author in the Author or Author@R fields257    If the role is funder (fnd), we normally don't have a person but an organization and the relation258    should be an edge "funding".259    If the role is "copyright holder" (cph), the relation is the edge "copyrights". In this case, and 260    also observed for authorship, we could habe an organization and not always a person. 261    '''262    # check role case funder263    if "roles" in author:264        if isinstance(author["roles"], str):265            author["roles"] = [ author["roles"] ]266        if "fnd" in author["roles"]:267            # this is an organization268            org_name = None269            if 'given' in author:270                org_name = author['given']271            elif 'full_name' in author:272                org_name = author['full_name']273            if org_name == None:274                return False275            organization = stagingArea.init_entity_from_template("organization", source=source_ref)276            if organization is None:277                raise("cannot init organization entity from default template")278            279            organization["labels"] = org_name280            local_org_id = stagingArea.get_uid()281            organization["_key"] = local_org_id282            organization["_id"] = "organizations/" + organization["_key"]283            stagingArea.staging_graph.insert_vertex("organizations", organization)284            # funding relation285            relation = {}286            relation["claims"] = {}287            relation["claims"]['P8324'] = [ {"references": [ source_ref ] } ]288            relation["_from"] = organization["_id"]289            relation["_to"] = "software/" + software_key290            relation["_id"] = "funding/" + organization["_key"] + "_" + software_key291            stagingArea.staging_graph.insert_edge("funding", edge=relation)292            if "cph" in author["roles"]:293                # the organization is also a copyright holder, so we had a copyrights relation too294                relation = {}295                relation["claims"] = {}296                relation["claims"]['P8324'] = [ {"references": [ source_ref ] } ]297                relation["_from"] = organization["_id"]298                relation["_to"] = "software/" + software_key299                relation["_id"] = "copyrights/" + organization["_key"] + "_" + software_key300                stagingArea.staging_graph.insert_edge("copyrights", edge=relation)301            return False302    person = stagingArea.init_entity_from_template("person", source=source_ref)303    if person is None:304        raise("cannot init person entity from default template")305    if "full_name" in author:306        person["labels"] = author['full_name']307    elif 'given' in author and 'family' in author:308        if isinstance(author['given'], str):309            person["labels"] = author['given'] + " " + author['family']310        else:311            for giv in author['given']:312                person["labels"] += giv + " "313            person["labels"] += author['family']314    elif 'given' in author:315        person["labels"] = author['given']316    else:317        # there is no name part available318        return None319    if 'given' in author:320        # "given name" -> P735321        local_value = {}322        local_value["value"] = author['given']323        local_value["datatype"] = "string"324        local_value["references"] = []325        local_value["references"].append(source_ref)326        person["claims"]["P735"] = []327        person["claims"]["P735"].append(local_value)328        329    if 'family' in author:330        # "family name" -> P734331        local_value = {}332        local_value["value"] = author['family']333        local_value["datatype"] = "string"334        local_value["references"] = []335        local_value["references"].append(source_ref)336        person["claims"]["P734"] = []337        person["claims"]["P734"].append(local_value)338    if 'orcid' in author:339        # P496340        local_value = {}341        local_value["value"] = author['orcid']342        local_value["datatype"] = "external-id"343        local_value["references"] = []344        local_value["references"].append(source_ref)345        person["claims"]["P496"] = []346        person["claims"]["P496"].append(local_value)347        person["index_orcid"] = author['orcid']348    if 'email' in author:349        # P968350        local_value = {}351        local_value["value"] = author['email']352        local_value["datatype"] = "url"353        local_value["references"] = []354        local_value["references"].append(source_ref)355        person["claims"]["P968"] = []356        person["claims"]["P968"].append(local_value)357    # github identifier P2037358    # Google Scholar author ID P1960359    # if only full_name is available, we would need grobid to further parse the name360    # check orcid duplicate361    matched_person = None362    if 'orcid' in author:363        cursor = stagingArea.persons.find({'index_orcid': author['orcid']}, skip=0, limit=1)364        if cursor.count()>0:365            matched_person = cursor.next()366    if matched_person != None:367        person = stagingArea.aggregate_with_merge(matched_person, person)368        stagingArea.staging_graph.update_vertex(person)369    else:370        local_id = stagingArea.get_uid()371        person["_key"] = local_id372        person["_id"] = "persons/" + person["_key"]373        stagingArea.staging_graph.insert_vertex("persons", person)374    if 'roles' in author:375        for role in author["roles"]:376            # relation based on role, via the actor edge collection377            if not role in relator_code_cran:378                # try some cleaning379                role = role.replace(")", "")380                role = role.strip("\"")381                if not role in relator_code_cran:382                    logging.warning("Error unknown role " + role + " defaulting to Contributor")383                    role = "ctb"384            wikidata_property = relator_code_cran[role]["wikidata"]385            set_role(stagingArea, wikidata_property, person, software_key, relator_code_cran[role]["marc_term"].replace(" ", "_"), source_ref)386    else:387        # role is undefined, we default to contributor (maybe not be the best choice?)388        set_role(stagingArea, relator_code_cran['ctb']["wikidata"], person, software_key, "Contributor", source_ref)389    if maintainer is None:390        return True391    match = False392    if "full_name" in maintainer:393        if "full_name" in person:394            if maintainer["full_name"] == person["full_name"]:395                match = True396        elif "given" in person:397            if maintainer["full_name"].find(person["given"]) != -1:398                if "family" in person:399                    if maintainer["full_name"].find(person["given"]) != -1:400                        match = True401                else:402                    match = True403    if match == True:404        # if email not present add it405        if "email" in maintainer and not "email" in person:406            person["email"] = maintainer["email"]407        if "full_name" in maintainer and not "full_name" in person:408            person["full_name"] = maintainer["full_name"]409        # add maintainer role, maintained by (P126)410        relation = {}411        relation["claims"] = {}412        relation["claims"]["P126"] = [ {"references": [ source_ref ] } ]413        relation["_from"] = "person/" + person["_key"]414        relation["_to"] = "software/" + software_key415        relation["_key"] = person["_key"] + "_" + software_key + "maintainer"416        stagingArea.staging_graph.insert_edge(stagingArea.actors, edge=relation)417        return True418    return False419def set_role(stagingArea, wikidata_property, person, software_key, role_term, source_ref):420    relation = {}421    relation["claims"] = {}422    relation["claims"][wikidata_property] = [ {"references": [ source_ref ] } ]423    relation["_from"] = person["_id"]424    relation["_to"] = "software/" + software_key425    relation["_id"] = "actors/" + person["_key"] + "_" + software_key + "_" + role_term426    # check if not already there (conservative check ;)427    if not stagingArea.staging_graph.has_edge(relation["_id"]):...test_nonlocal_statement.py
Source:test_nonlocal_statement.py  
1from unittest import TestCase2__all__ = ['NonlocalStatement']3GLOBAL_VALUE: int = 04class NonlocalStatement(TestCase):5    def setUp(self) -> None:6        # pylint: disable=global-statement7        global GLOBAL_VALUE8        GLOBAL_VALUE = 09    def test_access_nonlocal_without_nonlocal_statement(self) -> None:10        local_value: int = 011        def read() -> int:12            return local_value13        self.assertEqual(local_value, read(), 'We can access nonlocal variable without "nonlocal" statement usage. '14                                              'In this case "local_value" is a free variable')15    def test_redefine_free_variable(self) -> None:16        # pylint: disable=unused-variable17        free_value: int = 018        def read() -> int:19            # pylint: disable=used-before-assignment20            value: int = free_value  # local variable 'free_value' referenced before assignment21            free_value: int = value22            return free_value23        with self.assertRaises(UnboundLocalError):24            read()25    def test_modify_nonlocal_without_nonlocal_statement(self) -> None:26        local_value: int = 027        def modify() -> None:28            # pylint: disable=unused-variable29            local_value = 4230        modify()31        self.assertEqual(local_value, 0, 'We cannot modify nonlocal variable without "nonlocal" statement usage.'32                                         'Shadowing local variable will be just declared.')33    def test_modify_nonlocal_with_nonlocal_statement(self) -> None:34        local_value: int = 035        # def modify(local_value)  # Names listed in a nonlocal statement must not be defined as formal parameters36        def modify() -> None:37            # v = local_value  # SyntaxError: name 'local_value' is used prior to nonlocal declaration38            # local_value = -1 # SyntaxError: name 'local_value' is assigned to before nonlocal declaration39            # nonlocal GLOBAL_VALUE  # SyntaxError: no binding for nonlocal 'GLOBAL_VALUE' found40            nonlocal local_value41            local_value = 4242        modify()43        self.assertEqual(local_value, 42, 'We can modify nonlocal variable with "nonlocal" statement usage.')44    def test_modify_multiple_scopes_single_nonlocal(self) -> None:45        local_value: int = 046        def modify_lvl1() -> None:47            def modify_lvl2() -> None:48                nonlocal local_value49                local_value = 4250            modify_lvl2()51        modify_lvl1()52        self.assertEqual(local_value, 42, 'We can modify nonlocal variable with "nonlocal" statement usage '53                                          'even a few level above.')54    def test_modify_multiple_scopes_multiple_nonlocal(self) -> None:55        local_value: int = 056        def modify_lvl1() -> None:57            local_value: int = -158            def modify_lvl2():59                nonlocal local_value60                local_value = 4261            modify_lvl2()62        modify_lvl1()63        self.assertEqual(local_value, 0, 'We can modify nonlocal variable with "nonlocal" statement usage '64                                         'only from the nearest enclosing scope')65    def test_create_nonlocal_with_nonlocal_statement(self) -> None:66        # def create():67        #     nonlocal local_value    # SyntaxError: no binding for nonlocal 'local_value' found (the scope in which a68        #                             # new binding should be created69        #                             # cannot be determined unambiguously)70        #     local_value = 4271        #72        # create()73        pass74    def test_read_free_variable_in_exec_without_context(self) -> None:75        # pylint: disable=unused-variable76        local_variable: int = 077        def read() -> None:78            # pylint: disable=exec-used79            exec("value = local_variable")80        with self.assertRaises(NameError):81            read()82    def test_read_free_variable_in_exec_with_context(self) -> None:83        local_variable: int = 084        def read() -> None:85            # pylint: disable=exec-used86            exec("value = local_variable", {'local_variable': local_variable})...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
