Best Python code snippet using slash
experimental_data.py
Source:experimental_data.py  
1import pickle2import os3from tqdm import tqdm4import random5from utils import data_process6from knowledge_graph import Construct_KG78class Extract_Data:9    """10    Extract_Data is used to construct training, validation and test data by extracting TTD related relation paths from knowledge graph.11    In this work, 7,144 $drug-target-disease$ are extracted from Therapeutic Target Database (TTD) as true cases (The details could be found12    in Supplementary Data 1 of our published paper "SemaTyP: A Knowledge Graph Based LiteratureMining Method for Drug Discovery").13    The $\ell$ is set to 4, $K$ is 133 and $M$ is 52. Based on the aforementioned construction of training data, 19,230 positive data are14    obtained. Each data is a length of 873 (133*5+52*4) vector. On the other side, for each $drug-target-disease$, we random replaced the15    drug, target and disease with other drug, target and disease. If the new triplet doesn't exist in TTD, then it is considered as a false16    example, which is denoted as $drug^{'}-target^{'}-disease^{'}$. Similarly, 19,230 negative training data is obtained from false cases.17    """18    def __init__(self,predication_dir, TTD_dir, processed_dir):19        self.predication_dir=predication_dir20        self.TTD_dir=TTD_dir21        self.output_dir=processed_dir2223    def UMLS_type_vector(self):24        print("Construct the UMLS type vector...")25        with open(self.predication_dir+"/predications.txt", 'r') as f:26            entity_vector = {}27            predication_vector = {}2829            for line in tqdm(f, total=sum(1 for _ in open(self.predication_dir+"/predications.txt", 'r'))):30                sline = line.split("\t")31                if sline[0] == "" or sline[1] == "":32                    continue33                predicate = sline[3]34                subject_type = sline[4]35                object_type = sline[5].strip("\n")3637                if predicate not in predication_vector:38                    predication_vector[predicate] = len(predication_vector)3940                if subject_type not in entity_vector:41                    entity_vector[subject_type] = len(entity_vector)4243                if object_type not in entity_vector:44                    entity_vector[object_type] = len(entity_vector)4546        pickle.dump(entity_vector, open(self.output_dir+"/entity_vector", "wb+"))47        pickle.dump(predication_vector, open(self.output_dir+"/predicate_vector", "wb+"))48        return entity_vector, predication_vector4950    def drug_syndroms(self):51        """52        input: ./data/TTD/Synonyms.txt, which is downloaded from Therapeutic Target Database53        output: ./data/processed/drug_synonyms, which contains each drug and its corresponding treated synonys, the format of the drug_synonyms:54        1. drug_synonyms["drug_id"]55        2. drug_synonyms["drug_id"]["synonyms"]56        3. drug_synonyms[drug]["synonyms"][drug_synonym]=drug_id57        """58        print("Extracting the drug-syndroms information ...")59        with open(self.TTD_dir+"/Synonyms.txt", 'r') as f:60            drug_synonyms = {}61            for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir+"/Synonyms.txt", 'r'))):62                sline = line.split("\t")63                drug_id = sline[0].lower()64                drug_names = {}65                drug_names[data_process.process_en(sline[1].lower())] = drug_id66                synonyms = sline[2].lower().strip("\n").split(";")67                for s in synonyms:68                    synonym = data_process.process_en(s)69                    drug_names[synonym] = drug_id70                for drug_name in drug_names:71                    if drug_name not in drug_synonyms:72                        drug_synonyms[drug_name] = {}73                        drug_synonyms[drug_name]["drug_id"] = drug_names[drug_name]74                        drug_synonyms[drug_name]["synonyms"] = {}75                        for synonym in drug_names:76                            drug_synonyms[drug_name]["synonyms"][synonym] = drug_names[drug_name]77                        if drug_name in drug_synonyms[drug_name]["synonyms"]:78                            del drug_synonyms[drug_name]["synonyms"][drug_name]79                    else:80                        for synonym in drug_names:81                            drug_synonyms[drug_name]["synonyms"][synonym] = drug_names[drug_name]82                        if drug_name in drug_synonyms[drug_name]["synonyms"]:83                            del drug_synonyms[drug_name]["synonyms"][drug_name]8485            pickle.dump(drug_synonyms, open(self.output_dir+"/drug_synonyms", "wb+"))8687            return drug_synonyms8889    def disease_target(self):90        """91        input: ./data/TTD/target-disease_TTD2016.txt file which downloaded from Therapeutic Target Database92        output: ./data/processed/disease_targets. The format of disease_target:93        1. disease_targets[Indication]={}94        2. disease_targets[Indication][target_1]=target_1_ID95        Indication is the disease name.96        """97        print("Extracting the disease-target relations ...")98        with open(self.TTD_dir+"/target-disease_TTD2016.txt", "r") as f:99            disease_targets={}100            next(f)101            for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir+"/target-disease_TTD2016.txt", "r"))):102                sline = line.split("\t")103                TTDTargetID = sline[0].lower()104                Target_Name = sline[1].lower()105                Indications = sline[2].lower().split(";")106                new_Target_Name = data_process.process_en(Target_Name)107                for Indication in Indications:108                    new_Indication = data_process.process_en(Indication)109                    if new_Indication not in disease_targets:110                        disease_targets[new_Indication] = {}111                        disease_targets[new_Indication][new_Target_Name] = TTDTargetID112                    else:113                        disease_targets[new_Indication][new_Target_Name] = TTDTargetID114            pickle.dump(disease_targets, open(self.output_dir+"/disease_targets","wb+"))115116            return disease_targets117118    def drug_disease(self):119        """120         input: ./data/TTD/target-disease_TTD2016.txt file which downloaded from Therapeutic Target Database121         output: 1) ./data/processed/disease_drug; 2)./data/processed/drug_disease122123         1)disease_drug is a dictionary, the format is:124            disease_drug[disease_name]={} contains all drugs and corresponding ids which could treat the disease125            disease_drug[disease][drug]=drug_id126127         2)drug_disease is a dictionary, the format is:128            drug_disease[drug]={} contain all diseases which could be treated by this drug,129            drug_disease[drug][disease]=drug_id130            The reason why we built the drug_disease dictionary is because there is no disease ID in the initial TTD drug-disease_TTD2016.txt file131132        This Therapeutic Target Database currently contains 2,589 targets (including 397 successful, 723 clinical trial, and 1,469 research targets), and 31,614 drugs (including 2,071 approved, 9,528 clinical trial, 17,803 experimental drugs). 20,278 small molecules and 653 antisense drugs are with available structure or oligonucleotide sequence.133         """134        print("Extracting the drug-disease relations ...")135        disease_drug = {}136        drug_disease = {}137        with open(self.TTD_dir + "/drug-disease_TTD2016.txt", "r") as f:138            next(f)139            for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir + "/drug-disease_TTD2016.txt", "r"))):140                sline = line.split("\t")141                drug_id = sline[0].lower()142                drug = data_process.process_en(sline[1].lower().strip(" "))143                diseases = sline[2].lower().split(";")144                for disease in diseases:145                    disease_processed = data_process.process_en(disease)146                    if disease_processed not in disease_drug:147                        disease_drug[disease_processed] = {}148                        disease_drug[disease_processed][drug] = drug_id149                    else:150                        disease_drug[disease_processed][drug] = drug_id151152                if drug not in drug_disease:153                    drug_disease[drug] = {}154                    for disease in diseases:155                        disease_processed = data_process.process_en(disease)156                        drug_disease[drug][disease_processed] = drug_id157                else:158                    for disease in diseases:159                        disease_processed = data_process.process_en(disease)160                        drug_disease[drug][disease_processed] = drug_id161162        pickle.dump(disease_drug, open(self.output_dir+"/disease_drug", "wb+"))163        pickle.dump(drug_disease, open(self.output_dir+"/drug_disease", "wb+"))164        return drug_disease, disease_drug165166    def positive_dtd_cases(self):167        """168        positive_dtd_cases is used to obtain all TTD provided golden standard disease-target-drug relations which also existed in our constructed knowledge graph.169        """170        entities_and_type = {}171        print("Collecting all semantic types of each entity ...")172        with open(self.predication_dir + "/predications.txt", "r") as f:173            for line in tqdm(f, total=sum(1 for _ in open(self.predication_dir + "/predications.txt", "r"))):174                sline = line.split("\t")175                if sline[0] == "" or sline[1] == "":176                    continue177                entity_1 = data_process.process_en(sline[0])178                entity_2 = data_process.process_en(sline[1])179                entity_1_type = sline[4]180                entity_2_type = sline[5].strip("\n")181                if entity_1 not in entities_and_type:182                    entities_and_type[entity_1] = {}183                    entities_and_type[entity_1][entity_1_type] = 1184                else:185                    entities_and_type[entity_1][entity_1_type] = 1186187                if entity_2 not in entities_and_type:188                    entities_and_type[entity_2] = {}189                    entities_and_type[entity_2][entity_2_type] = 1190                else:191                    entities_and_type[entity_2][entity_2_type] = 1192193        output=open(self.output_dir+"/experimental_disease_target_drug","w+")194        with open(self.TTD_dir + "/disease_target_drug_cases.txt", "r", encoding='utf-8', errors='replace') as f:195            for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir + "/disease_target_drug_cases.txt", "r"))):196                # Disease:sickle-cell_disease	Target:humanized_igg2	Drug:selg2197                sline = line.split("\t")198                disease = data_process.process_en(sline[0].split(":")[1])199                target = data_process.process_en(sline[1].split(":")[1])200                drug = data_process.process_en(sline[2].strip("\n").split(":")[1])201                if disease in entities_and_type and target in entities_and_type and drug in entities_and_type:202                    output.write("Disease:" + disease + "\tTarget:" + target + "\tDrug:" + drug + "\n")203        output.close()204205    def positive_training_data(self):206        """207        positive_training_data is used to construct positive training data all each drug-target-disease cases from "experimental_disease_target_drug"208        """209        if os.path.exists(self.output_dir+"/KnowledgeGraph"):210            KG = pickle.load(open(self.output_dir+"/KnowledgeGraph", "rb"))211        else:212            constuct_KG= Construct_KG(self.predication_dir+"/predications.txt",self.output_dir+"/KnowledgeGraph")213            KG = constuct_KG.construct_KnowledgeGraph()214215        if os.path.exists(self.output_dir+"/predicate_vector") and os.path.exists(self.output_dir+"/entity_vector"):216            entity_vector = pickle.load(open(self.output_dir+"/entity_vector", "rb"))217            predicate_vector = pickle.load(open(self.output_dir+"/predicate_vector", "rb"))218        else:219            entity_vector, predicate_vector = self.UMLS_type_vector()220221        if not os.path.exists(self.output_dir+"/experimental_disease_target_drug"):222            self.initial_disease_target_drug()223224        output = open(self.output_dir+"/all_positive_data", "w+")225        print("Constructing the positive training data ...")226        with open(self.output_dir + "/experimental_disease_target_drug", "r") as f:227            for line in tqdm(f, total=sum(1 for _ in open(self.output_dir + "/experimental_disease_target_drug", "r"))):228                sline = line.split("\t")229                drug = data_process.process_en(sline[2].split(":")[1].strip("\n"))230                disease = data_process.process_en(sline[0].split(":")[1].strip("\n"))231                target = data_process.process_en(sline[1].split(":")[1].strip("\n"))232                print("Constructing the %s\t%s\t%s\t relations ..." %(drug,target,disease))233                self.construct_training_positive_data_based_one_dtd(KG, entity_vector, predicate_vector, drug, target, disease, output)234235        output.close()236237238    def construct_training_positive_data_based_one_dtd(self, KG, entity_vector, predicate_vector, drug, target,239                                                       disease, output):240241        ##242        # The struction of KG, KG is a dictionary, and the following shows the format of the KGï¼243        # KG={244        #     subject:245        #            {"TYPES":{sysn:2,horm:1,htrf:3}246        #             "OBJECTS":247        #                       object_1:{248        #                                 "TYPES":{}249        #                                 "PREDICATES":{250        #                                              predicate_1:3,251        #                                              predicate_2:4252        #                      }}}}}253        ##254        ##255        # For one specific drug-target-disease example, there are 4 potential possible positive cases could be constructed, and each of the 4 is a vector of lenght 792:256        # In the KG,257        # case 1: drug - PREDICATE_1 - target - PREDICATE_2 - disease  Case 1 indicates the drug, target and disease are directly connected. For case 1, we construct a vector, the order of each eneity in the vector is drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease258        # case 2: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - disease Case 2 indicates that the target and disease are directly connected, while the drug and target are indirectly connected.259        # case 3: drug - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease Case 3 and Case 4 are the other 2 situations.260        # case 4: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease261        # drug->target->262        if drug in KG:263            # For case 1: drug - PREDICATE_1 - target - PREDICATE_2 - disease264            # We used a vector of lenght 873, the format is:265            # drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease266            if target in KG[drug]["OBJECTS"]:267                if target in KG:268                    if disease in KG[target]["OBJECTS"]:269                        ## construct vector for drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease270                        #  drug part of vector271                        vector = [0] * 873272                        for umls_type in KG[drug]["TYPES"]:273                            vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]274                        #  PREDICATE_1 part of vector275                        for predicate in KG[drug]["OBJECTS"][target]["PREDICATES"]:276                            vector[133 + predicate_vector[predicate]] += KG[drug]["OBJECTS"][target]["PREDICATES"][277                                predicate]278                        #  the REAL target part of vector: in this part, the REAL target is both object (for drug) and subject ï¼for diseaseï¼,so all the umls typs of target(as subject and object) should be collected in vector279                        #  -- 1 the REAL target part of vector: target as object280                        for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:281                            vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \282                            KG[drug]["OBJECTS"][target]["TYPES"][umls_type]283                        #  -- 2 the REAL target part of vector: target as subject284                        for umls_type in KG[target]["TYPES"]:285                            vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]286                        # target part of vector: The target of the first PREDICATE_1 - target - PREDICATE_1, the value if copied from the REAL target287                        vector[133 + 52:133 + 52 + 133] = vector[133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]288                        # PREDICATE_1 part of vector: the valued of the second PREDICATE_1 is same as the first PREDICATE_1, which is copied from PREDICATE_1289                        vector[133 + 52 + 133:133 + 52 + 133 + 52] = vector[133:133 + 52]290                        # PREDICATE_2 part of vector291                        for predicate in KG[target]["OBJECTS"][disease]["PREDICATES"]:292                            vector[133 + 52 + 133 + 52 + 133 + predicate_vector[predicate]] += \293                            KG[target]["OBJECTS"][disease]["PREDICATES"][predicate]294                        # target part of vector:PREDICATE_2 - target - PREDICATE_2, the value of the second target is same as REAL target, which is copied from REAL target295                        vector[133 + 52 + 133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133 + 52 + 133] = vector[296                                                                                                      133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]297                        # PREDICATE_2 part of vector:298                        vector[299                        133 + 52 + 133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52 + 133 + 52] = vector[300                                                                                                          133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52]301                        # disease part of vector302                        for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:303                            vector[133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \304                            KG[target]["OBJECTS"][disease]["TYPES"][umls_type]305                        for umls_number in vector:306                            output.write(str(umls_number) + "\t")307                        output.write("1\n")308                    # For case 3: drug - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease309                    # The format of the result is drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease310                    else:311                        for entity in KG[target]["OBJECTS"]:312                            if entity in KG:313                                if disease in KG[entity]["OBJECTS"]:314                                    vector = [0] * 873315                                    # drug part of vector316                                    for umls_type in KG[drug]["TYPES"]:317                                        vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]318                                    # PREDICATE_1 part of vector319                                    for predicate_1 in KG[drug]["OBJECTS"][target]["PREDICATES"]:320                                        vector[133 + predicate_vector[predicate_1]] += \321                                        KG[drug]["OBJECTS"][target]["PREDICATES"][predicate_1]322                                    # the REAL target of vector323                                    # --1: The target is used as object324                                    for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:325                                        vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \326                                        KG[drug]["OBJECTS"][target]["TYPES"][umls_type]327                                    # --2: The target is the subject328                                    for umls_type in KG[target]["TYPES"]:329                                        vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \330                                        KG[target]["TYPES"][umls_type]331                                    # target of vector: PREDICATE_1 - target - PREDICATE_1, the value of the target if copied from REAL target332                                    vector[133 + 52:133 + 52 + 133] = vector[333                                                                      133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]334                                    # PREDICATE_1 of vector: it's copied from the first PREDICATE_1335                                    vector[133 + 52 + 133:133 + 52 + 133 + 52] = vector[133:133 + 52]336                                    # PREDICATE_2 of vector337                                    for predicate_2 in KG[target]["OBJECTS"][entity]["PREDICATES"]:338                                        vector[133 + 52 + 133 + 52 + 133 + predicate_vector[predicate_2]] += \339                                        KG[target]["OBJECTS"][entity]["PREDICATES"][predicate_2]340                                    # entity of vector: PREDICATE_2 - entity - PREDICATE_3341                                    # -- 1 : entity is the object342                                    for umls_type in KG[target]["OBJECTS"][entity]["TYPES"]:343                                        vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \344                                        KG[target]["OBJECTS"][entity]["TYPES"][umls_type]345                                    # --2 : entity is the subject346                                    for umls_type in KG[entity]["TYPES"]:347                                        vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \348                                        KG[entity]["TYPES"][umls_type]349                                    # PREDICATE_3 of vector350                                    for predicate_3 in KG[entity]["OBJECTS"][disease]["PREDICATES"]:351                                        vector[352                                            133 + 52 + 133 + 52 + 133 + 52 + 133 + predicate_vector[predicate_3]] += \353                                        KG[entity]["OBJECTS"][disease]["PREDICATES"][predicate_3]354                                    # disease of vector355                                    for umls_type in KG[entity]["OBJECTS"][disease]["TYPES"]:356                                        vector[357                                            133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \358                                        KG[entity]["OBJECTS"][disease]["TYPES"][umls_type]359                                    for umls_number in vector:360                                        output.write(str(umls_number) + "\t")361                                    output.write("1\n")362            # For case 2: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - disease363            # The result format: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - target - PREDICATE_3 - disease364            else:365                for entity_1 in KG[drug]["OBJECTS"]:366                    if entity_1 in KG:367                        if target in KG[entity_1]["OBJECTS"]:368                            if target in KG:369                                if disease in KG[target]["OBJECTS"]:370                                    vector = [0] * 873371                                    # drug part of vector372                                    for umls_type in KG[drug]["TYPES"]:373                                        vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]374                                    # PREDICATE_1 part of vector375                                    for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:376                                        vector[133 + predicate_vector[predicate_1]] += \377                                        KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]378                                    # entity part of vector: This entity could be used as both subject and object, then all the umls_typy should be collected.379                                    # --1 entity part of vector: entity is the object380                                    for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:381                                        vector[133 + 52 + entity_vector[umls_type]] += \382                                        KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]383                                    # --2 entity part of vector: entity is the subject384                                    for umls_type in KG[entity_1]["TYPES"]:385                                        vector[133 + 52 + entity_vector[umls_type]] += KG[entity_1]["TYPES"][386                                            umls_type]387                                    # PREDICATE_2 part of vector388                                    for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:389                                        vector[133 + 52 + 133 + predicate_vector[predicate_2]] += \390                                        KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]391                                    # the REAL target part of vector: target could be subject or object392                                    # --1 target part of vector: target is subject393                                    for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:394                                        vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \395                                        KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]396                                    # --2 target part of vector: target is object397                                    for umls_type in KG[target]["TYPES"]:398                                        vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \399                                        KG[target]["TYPES"][umls_type]400                                    # PREDICATE_3 part of vector401                                    for predicate_3 in KG[target]["OBJECTS"][disease]["PREDICATES"]:402                                        vector[133 + 52 + 133 + 52 + 133 + predicate_vector[predicate_3]] += \403                                        KG[target]["OBJECTS"][disease]["PREDICATES"][predicate_3]404                                    # target part of vector: PREDICATE_3 - target - PREDICATE_3, the target is same as REAL target405                                    vector[406                                    133 + 52 + 133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133 + 52 + 133] = vector[407                                                                                                           133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]408                                    # PREDICATE_3 part of vector: the second PREDICATE_3 is same as the first PREDICATE_3409                                    vector[410                                    133 + 52 + 133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52 + 133 + 52] = vector[411                                                                                                                      133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52]412                                    # disease part of vector413                                    for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:414                                        vector[415                                            133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \416                                        KG[target]["OBJECTS"][disease]["TYPES"][umls_type]417                                    for umls_number in vector:418                                        output.write(str(umls_number) + "\t")419                                    output.write("1\n")420                                # For case 4: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease421                                # The output format:drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease422                                else:423                                    for entity_2 in KG[target]["OBJECTS"]:424                                        if entity_2 in KG:425                                            if disease in KG[entity_2]["OBJECTS"]:426                                                vector = [0] * 873427                                                # drug part of vector428                                                for umls_type in KG[drug]["TYPES"]:429                                                    vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]430                                                # PREDICATE_1 part of vector431                                                for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:432                                                    vector[133 + predicate_vector[predicate_1]] += \433                                                    KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]434                                                # entity_1 part of vector435                                                # --1 : entity_1 is object436                                                for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:437                                                    vector[133 + 52 + entity_vector[umls_type]] += \438                                                    KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]439                                                # --2 : entity_1 is subject440                                                for umls_type in KG[entity_1]["TYPES"]:441                                                    vector[133 + 52 + entity_vector[umls_type]] += \442                                                    KG[entity_1]["TYPES"][umls_type]443                                                # PREDICATE_2 part of vector444                                                for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:445                                                    vector[133 + 52 + 133 + predicate_vector[predicate_2]] += \446                                                    KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]447                                                # target part of vector448                                                # --1 : target is object449                                                for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:450                                                    vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \451                                                    KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]452                                                # --2 : target is subject453                                                for umls_type in KG[target]["TYPES"]:454                                                    vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \455                                                    KG[target]["TYPES"][umls_type]456                                                # PREDICATE_3 part of vector457                                                for predicate_3 in KG[target]["OBJECTS"][entity_2]["PREDICATES"]:458                                                    vector[133 + 52 + 133 + 52 + 133 + predicate_vector[459                                                        predicate_3]] += \460                                                    KG[target]["OBJECTS"][entity_2]["PREDICATES"][predicate_3]461                                                # entity_2 part of vector462                                                # --1 : entity_2 is object463                                                for umls_type in KG[target]["OBJECTS"][entity_2]["TYPES"]:464                                                    vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[465                                                        umls_type]] += KG[target]["OBJECTS"][entity_2]["TYPES"][466                                                        umls_type]467                                                # --1 : entity_2 is subject468                                                for umls_type in KG[entity_2]["TYPES"]:469                                                    vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[470                                                        umls_type]] += KG[entity_2]["TYPES"][umls_type]471                                                # PREDICATE_4 part of vector472                                                for predicate_4 in KG[entity_2]["OBJECTS"][disease]["PREDICATES"]:473                                                    vector[133 + 52 + 133 + 52 + 133 + 52 + 133 + predicate_vector[474                                                        predicate_4]] += \475                                                    KG[entity_2]["OBJECTS"][disease]["PREDICATES"][predicate_4]476                                                # disease part of vector477                                                for umls_type in KG[entity_2]["OBJECTS"][disease]["TYPES"]:478                                                    vector[479                                                        133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[480                                                            umls_type]] += \481                                                    KG[entity_2]["OBJECTS"][disease]["TYPES"][umls_type]482                                                for umls_number in vector:483                                                    output.write(str(umls_number) + "\t")484                                                output.write("1\n")485        else:486            print("NOT FOUND " + drug + "\t" + target + "\t" + disease)487488489    def negative_dtd_cases(self):490        """491        negative_dtd_cases is used to construct negative drug-target-disease associations.492        """493        if os.path.exists(self.output_dir + "/KnowledgeGraph"):494            KG = pickle.load(open(self.output_dir + "/KnowledgeGraph", "rb"))495        else:496            constuct_KG = Construct_KG(self.predication_dir + "/predications.txt", self.output_dir + "/KnowledgeGraph")497            KG = constuct_KG.construct_KnowledgeGraph()498499        entity_set={}500        drug_type_set={}501        target_type_set={}502        disease_type_set={}503        output = open(self.output_dir + "/experimental_disease_target_drug_negative", "w+")504        print("Constructing the negative drug-target-disease cases ...")505        with open(self.output_dir + "/experimental_disease_target_drug", "r") as f:506            for line in tqdm(f, total=sum(1 for _ in open(self.output_dir + "/experimental_disease_target_drug", "r"))):507                sline=line.split("\t")508                drug=data_process.process_en(sline[2].split(":")[1].strip("\n"))509                disease=data_process.process_en(sline[0].split(":")[1].strip("\n"))510                target=data_process.process_en(sline[1].split(":")[1].strip("\n"))511                entity_set[drug]=1512                entity_set[target]=1513                entity_set[disease]=1514                if drug in KG:515                    for umls_type in KG[drug]["TYPES"]:516                        drug_type_set[umls_type]=1517                if target in KG:518                    for umls_type in KG[target]["TYPES"]:519                        target_type_set[umls_type]=1520                if disease in KG:521                    for umls_type in KG[disease]["TYPES"]:522                        disease_type_set[umls_type]=1523524            negative_examples_count=0525            entity_list=list(KG)526            selected_examples={}527            while negative_examples_count < 5000000:528                random_drug=random.choice(entity_list)529                random_target=random.choice(entity_list)530                random_disease=random.choice(entity_list)531                selected_example=random_drug+" "+random_target+" "+random_disease532                while selected_example in selected_examples:533                    random_drug=random.choice(entity_list)534                    random_target=random.choice(entity_list)535                    random_disease=random.choice(entity_list)536                    selected_example=random_drug+" "+random_target+" "+random_disease537538                if random_drug not in entity_set and random_target not in entity_set and random_disease not in entity_set:539                    drug_umls_type_exist=False540                    target_umls_type_exist=False541                    disease_umls_type_exist=False542                    for umls_type_of_drug in KG[random_drug]["TYPES"]:543                        if umls_type_of_drug in drug_type_set:544                            drug_umls_type_exist=True545                            break546                    for umls_type_of_target in  KG[random_target]["TYPES"]:547                        if umls_type_of_target in target_type_set:548                            target_umls_type_exist=True549                            break550                    for umls_type_of_disease in KG[random_disease]["TYPES"]:551                        if umls_type_of_disease in disease_type_set:552                            disease_umls_type_exist=True553                    if drug_umls_type_exist == target_umls_type_exist == disease_umls_type_exist == True:554                        output.write("Disease:"+random_disease+"\tTarget:"+random_target+"\tDrug:"+random_drug+"\n")555                        negative_examples_count += 1556557558    def negative_training_data(self):559        if os.path.exists(self.output_dir+"/KnowledgeGraph"):560            KG = pickle.load(open(self.output_dir+"/KnowledgeGraph", "rb"))561        else:562            constuct_KG= Construct_KG(self.predication_dir+"/predications.txt",self.output_dir+"/KnowledgeGraph")563            KG = constuct_KG.construct_KnowledgeGraph()564565        if os.path.exists(self.output_dir+"/predicate_vector") and os.path.exists(self.output_dir+"/entity_vector"):566            entity_vector = pickle.load(open(self.output_dir+"/entity_vector", "rb"))567            predicate_vector = pickle.load(open(self.output_dir+"/predicate_vector", "rb"))568        else:569            entity_vector, predicate_vector = self.UMLS_type_vector()570571        if not os.path.exists(self.output_dir + "/experimental_disease_target_drug_negative"):572            self.negative_dtd_cases()573574        output = open(self.output_dir + "/all_negative_data", "w+")575        print("Constructing the negative training data ...")576        with open(self.output_dir + "/experimental_disease_target_drug_negative", "r") as f:577            for line in tqdm(f, total=sum(1 for _ in open(self.output_dir + "/experimental_disease_target_drug_negative", "r"))):578                sline=line.split("\t")579                drug=sline[2].split(":")[1].strip("\n")580                disease=sline[0].split(":")[1].strip("\n")581                target=sline[1].split(":")[1].strip("\n")582                self.construct_training_negative_data_based_one_dtd(KG,entity_vector,predicate_vector,drug,target,disease,output)583        output.close()584585    def construct_training_negative_data_based_one_dtd(self,KG,entity_vector,predicate_vector,drug,target,disease,output):586        ##587        # The function is similar to construct_training_positive_data_based_one_dtd function.588        # The struction of KGï¼589        # KG={590        #     subject:591        #            {"TYPES":{sysn:2,horm:1,htrf:3}592        #             object_1:{593        #                      "TYPES":{}594        #                      predicate_1:3,595        #                      predicate_2:4596        #                      }}}}}597        ##598        ##599        if drug in KG:600            # For case 1: drug - PREDICATE_1 - target - PREDICATE_2 - disease601            #  drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease602            if target in KG[drug]["OBJECTS"]:603                if target in KG:604                    if disease in KG[target]["OBJECTS"]:605                        ## construct vector for drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease606                        #  drug part of vector607                        vector=[0]*873608                        for umls_type in KG[drug]["TYPES"]:609                            vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]610                        #  PREDICATE_1 part of vector611                        for predicate in KG[drug]["OBJECTS"][target]["PREDICATES"]:612                            vector[133+predicate_vector[predicate]] += KG[drug]["OBJECTS"][target]["PREDICATES"][predicate]613                        #  the REAL target part of vector: in this part, the REAL target is both object (for drug) and subject ï¼for diseaseï¼,so all the umls typs of target(as subject and object) should be collected in vector614                        #  -- 1 the REAL target part of vector: target as object615                        for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:616                            vector[133+52+133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][target]["TYPES"][umls_type]617                        #  -- 2 the REAL target part of vector: target as subject618                        for umls_type in KG[target]["TYPES"]:619                            vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]620                        # target part of vector621                        vector[133+52:133+52+133] = vector[133+52+133+52:133+52+133+52+133]622                        # PREDICATE_1 part of vector623                        vector[133+52+133:133+52+133+52] = vector[133:133+52]624                        # PREDICATE_2 part of vector625                        for predicate in KG[target]["OBJECTS"][disease]["PREDICATES"]:626                            vector[133+52+133+52+133+predicate_vector[predicate]] += KG[target]["OBJECTS"][disease]["PREDICATES"][predicate]627                        # target part of vector:PREDICATE_2 - target - PREDICATE_2628                        vector[133+52+133+52+133+52:133+52+133+52+133+52+133] = vector[133+52+133+52:133+52+133+52+133]629                        # PREDICATE_2 part of vector630                        vector[133+52+133+52+133+52+133:133+52+133+52+133+52+133+52] = vector[133+52+133+52+133:133+52+133+52+133+52]631                        # disease part of vector632                        for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:633                            vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][disease]["TYPES"][umls_type]634                        for umls_number in vector:635                            output.write(str(umls_number)+"\t")636                        output.write("0\n")637638                    # For case 3: drug - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease639                    # drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease640                    else:641                        for entity in KG[target]["OBJECTS"]:642                            if entity in KG:643                                if disease in KG[entity]["OBJECTS"]:644                                    vector=[0]*873645                                    # drug part of vector646                                    for umls_type in KG[drug]["TYPES"]:647                                        vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]648                                    # PREDICATE_1 part of vector649                                    for predicate_1 in KG[drug]["OBJECTS"][target]["PREDICATES"]:650                                        vector[133+predicate_vector[predicate_1]] += KG[drug]["OBJECTS"][target]["PREDICATES"][predicate_1]651                                    # the REAL target of vector652                                    # --1: target is object653                                    for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:654                                        vector[133+52+133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][target]["TYPES"][umls_type]655                                    # --2: target is subject656                                    for umls_type in KG[target]["TYPES"]:657                                        vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]658                                    # target of vector: PREDICATE_1 - target - PREDICATE_1659                                    vector[133+52:133+52+133] = vector[133+52+133+52:133+52+133+52+133]660                                    # PREDICATE_1 of vector661                                    vector[133+52+133:133+52+133+52] = vector[133:133+52]662                                    # PREDICATE_2 of vector663                                    for predicate_2 in KG[target]["OBJECTS"][entity]["PREDICATES"]:664                                        vector[133+52+133+52+133+predicate_vector[predicate_2]] += KG[target]["OBJECTS"][entity]["PREDICATES"][predicate_2]665                                    # entity of vector: PREDICATE_2 - entity - PREDICATE_3666                                    # -- 1 : entity is object667                                    for umls_type in KG[target]["OBJECTS"][entity]["TYPES"]:668                                        vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][entity]["TYPES"][umls_type]669                                    # --2 : entity is subject670                                    for umls_type in KG[entity]["TYPES"]:671                                        vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity]["TYPES"][umls_type]672                                    # PREDICATE_3 of vector673                                    for predicate_3 in KG[entity]["OBJECTS"][disease]["PREDICATES"]:674                                        vector[133+52+133+52+133+52+133+predicate_vector[predicate_3]] += KG[entity]["OBJECTS"][disease]["PREDICATES"][predicate_3]675                                    # disease of vector676                                    for umls_type in KG[entity]["OBJECTS"][disease]["TYPES"]:677                                        vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity]["OBJECTS"][disease]["TYPES"][umls_type]678                                    for umls_number in vector:679                                        output.write(str(umls_number)+"\t")680                                    output.write("0\n")681                                    if len(vector) > 874:682                                        print("case 3\t"+str(len(vector)))683            # case 2: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - disease684            else:685                for entity_1 in KG[drug]["OBJECTS"]:686                    if entity_1 in KG:687                        if target in KG[entity_1]["OBJECTS"]:688                            if target in KG:689                                if disease in KG[target]["OBJECTS"]:690                                    vector=[0]*873691                                    # drug part of vector692                                    for umls_type in KG[drug]["TYPES"]:693                                        vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]694                                    # PREDICATE_1 part of vector695                                    for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:696                                        vector[133+predicate_vector[predicate_1]] += KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]697                                    # entity part of vector698                                    # --1 entity part of vector: entity is object699                                    for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:700                                        vector[133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]701                                    # --2 entity part of vector: entity is subject702                                    for umls_type in KG[entity_1]["TYPES"]:703                                        vector[133+52+entity_vector[umls_type]] += KG[entity_1]["TYPES"][umls_type]704                                    # PREDICATE_2 part of vector705                                    for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:706                                        vector[133+52+133+predicate_vector[predicate_2]] += KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]707                                    # the REAL target part of vector708                                    # --1 target part of vector: target is subject709                                    for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:710                                        vector[133+52+133+52+entity_vector[umls_type]] += KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]711                                    # --2 target part of vector: target is object712                                    for umls_type in KG[target]["TYPES"]:713                                        vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]714                                    # PREDICATE_3 part of vector715                                    for predicate_3 in KG[target]["OBJECTS"][disease]["PREDICATES"]:716                                        vector[133+52+133+52+133+predicate_vector[predicate_3]] += KG[target]["OBJECTS"][disease]["PREDICATES"][predicate_3]717                                    # target part of vector: PREDICATE_3 - target - PREDICATE_3718                                    vector[133+52+133+52+133+52:133+52+133+52+133+52+133]=vector[133+52+133+52:133+52+133+52+133]719                                    # PREDICATE_3 part of vector:è¿æ¯ç¬¬äºä¸ªPREDICATE_3720                                    vector[133+52+133+52+133+52+133:133+52+133+52+133+52+133+52] = vector[133+52+133+52+133:133+52+133+52+133+52]721                                    # disease part of vector722                                    for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:723                                        vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][disease]["TYPES"][umls_type]724                                    for umls_number in vector:725                                        output.write(str(umls_number)+"\t")726                                    output.write("0\n")727                                    if len(vector) > 874:728                                        print("case 2\t"+str(len(vector)))729                                # For case 4: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease730                                # Example: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease731                                else:732                                    for entity_2 in KG[target]["OBJECTS"]:733                                        if entity_2 in KG:734                                            if disease in KG[entity_2]["OBJECTS"]:735                                                vector=[0]*873736                                                # drug part of vector737                                                for umls_type in KG[drug]["TYPES"]:738                                                    vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]739                                                # PREDICATE_1 part of vector740                                                for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:741                                                    vector[133+predicate_vector[predicate_1]] += KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]742                                                # entity_1 part of vector743                                                # --1 : entity_1 is object744                                                for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:745                                                    vector[133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]746                                                # --2 : entity_1 is subject747                                                for umls_type in KG[entity_1]["TYPES"]:748                                                    vector[133+52+entity_vector[umls_type]] += KG[entity_1]["TYPES"][umls_type]749                                                # PREDICATE_2 part of vector750                                                for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:751                                                    vector[133+52+133+predicate_vector[predicate_2]] += KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]752                                                # target part of vector753                                                # --1 : target is object754                                                for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:755                                                    vector[133+52+133+52+entity_vector[umls_type]] += KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]756                                                # --2 : target is subject757                                                for umls_type in KG[target]["TYPES"]:758                                                    vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]759                                                # PREDICATE_3 part of vector760                                                for predicate_3 in KG[target]["OBJECTS"][entity_2]["PREDICATES"]:761                                                    vector[133+52+133+52+133+predicate_vector[predicate_3]] += KG[target]["OBJECTS"][entity_2]["PREDICATES"][predicate_3]762                                                # entity_2 part of vector763                                                # --1 : entity_2 is object764                                                for umls_type in KG[target]["OBJECTS"][entity_2]["TYPES"]:765                                                    vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][entity_2]["TYPES"][umls_type]766                                                # --1 : entity_2 is subject767                                                for umls_type in KG[entity_2]["TYPES"]:768                                                    vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity_2]["TYPES"][umls_type]769                                                # PREDICATE_4 part of vector770                                                for predicate_4 in KG[entity_2]["OBJECTS"][disease]["PREDICATES"]:771                                                    vector[133+52+133+52+133+52+133+predicate_vector[predicate_4]] += KG[entity_2]["OBJECTS"][disease]["PREDICATES"][predicate_4]772                                                # disease part of vector773                                                for umls_type in KG[entity_2]["OBJECTS"][disease]["TYPES"]:774                                                    vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity_2]["OBJECTS"][disease]["TYPES"][umls_type]775                                                for umls_number in vector:776                                                    output.write(str(umls_number)+"\t")777                                                output.write("0\n")778                                                if len(vector) > 874:779                                                    print("case 4\t"+str(len(vector)))780781    def construct_all_data(self):782        if not os.path.exists(self.output_dir + "/predicate_vector"):783            self.UMLS_type_vector()784785        if not os.path.exists(self.output_dir + "/drug_synonyms"):786            self.drug_syndroms()787788        if not os.path.exists(self.output_dir + "/disease_targets"):789            self.disease_target()790791        if not os.path.exists(self.output_dir + "/drug_disease"):792            self.drug_disease()793794        if not os.path.exists(self.output_dir + "/experimental_disease_target_drug"):795            self.positive_dtd_cases()796797        if not os.path.exists(self.output_dir + "/all_positive_data"):798            self.positive_training_data()799800        if not os.path.exists(self.output_dir + "/experimental_disease_target_drug_negative"):801            self.negative_dtd_cases()802803        if not os.path.exists(self.output_dir + "/all_negative_data"):804            self.negative_training_data()805806if __name__ == "__main__":807    predication_dir = "./data/SemmedDB"808    TTD_dir = "./data/TTD"809    processed_dir = "./data/processed"810    s=Extract_Data(predication_dir,TTD_dir,processed_dir)811    s.construct_all_data()
...Statement.py
Source:Statement.py  
1from Predicate import *2import copy34class Statement():5    """6    defines one FOL statement and the operations allowed on them7    member variables include:8    predicate_set : set of 'Predicate' objects which are 9    connected via OR operator in a statement10    statement_string : string representation of statement11    """12    def __init__(self, statement_string=None):13        if statement_string:14            predicate_list = statement_string.split('|')15            predicate_list = map(lambda x:Predicate(x), predicate_list)16            self.predicate_set = set(predicate_list)17            statement_string_list = map(lambda x: x.predicate_string, self.predicate_set)18            self.statement_string = '|'.join(statement_string_list)19        else:20            self.statement_string = None21            self.predicate_set = None2223    def init_from_string(self, statement_string):24        """25        initializes a Statement object from statement string26        """27        predicate_list = statement_string.split('|')28        predicate_list = map(lambda x:Predicate(x), predicate_list)29        self.predicate_set = set(predicate_list)30        statement_string_list = map(lambda x: x.predicate_string, self.predicate_set)31        self.statement_string = '|'.join(statement_string_list)3233    def init_from_predicate_set(self, predicate_set):34        """35        initializes Statement object from a predicate set36        """37        self.predicate_set = predicate_set38        statement_string_list = map(lambda x: x.predicate_string, predicate_set)39        self.statement_string = '|'.join(statement_string_list)4041    def __str__(self):42        return self.statement_string4344    def __eq__(self, statement):45        return self.predicate_set==statement.predicate_set4647    def __hash__(self):48        return hash((''.join(sorted(self.statement_string))))4950    def exists_in_KB(self, KB):51        '''52        returns true if cnf_statement already exists53        in the KNOWLEDGE_BASE else False54        '''55        if self in KB:56            return True57        return False5859    def add_statement_to_KB(self, KB, KB_HASH):60        """61        adds a statement in a knowledge base and updates the Hash62        """63        KB.add(self)64        for predicate in self.predicate_set:65            if predicate.name in KB_HASH:66                KB_HASH[predicate.name].add(self)67            else:68                KB_HASH[predicate.name] = set([self])6970    def resolve(self, statement):71        '''72        Resolves two statements73        returns False if a contradiction is encountered when resolved otherwise,74        returns set of new infered statements(empty if no statements infered)75        '''76        infered_statements = set()77        for predicate_1 in self.predicate_set:78            for predicate_2 in statement.predicate_set:79                unification = False80                if (predicate_1.negative ^ predicate_2.negative) and predicate_1.name==predicate_2.name:81                    unification = predicate_1.unify_with_predicate(predicate_2) # returns substitution if statements can unify else false82                if unification == False:83                    continue84                else:85                    rest_statement_1 = copy.deepcopy(self.predicate_set)86                    rest_statement_2 = copy.deepcopy(statement.predicate_set)87                    rest_statement_1 = filter(lambda x: False if x == predicate_1 else True, rest_statement_1)88                    rest_statement_2 = filter(lambda x: False if x == predicate_2 else True, rest_statement_2)89                    if not rest_statement_1 and not rest_statement_2:           # contradiction found90                        return False91                    rest_statement_1 = map(lambda x: x.substitute(unification), rest_statement_1)92                    rest_statement_2 = map(lambda x: x.substitute(unification), rest_statement_2)93                    new_statement = Statement()94                    new_statement.init_from_predicate_set(set(rest_statement_1+rest_statement_2))95                    infered_statements.add(new_statement)96        return infered_statements9798    def get_resolving_clauses(self, KB_HASH):99        """100        returns a set of possible statements101        the self statement object can resolve with102        """103        resolving_clauses = set()104        for predicate in self.predicate_set:105            if predicate.name in KB_HASH:106                resolving_clauses = resolving_clauses.union(KB_HASH[predicate.name])
...test_multisplitby.py
Source:test_multisplitby.py  
...9    return list(yield_all())10def predicate_1(x: int) -> bool:11    """Return true if the integer is 3."""12    return x == 313def predicate_2(x: int) -> bool:14    """Return true if the integer is 6."""15    return x == 616def predicate_3(x: int) -> bool:17    """Return true if the integer is 8."""18    return x == 819class TestIter(unittest.TestCase):20    """Test :mod:`multisplitby`."""21    def test_split_by_iterable_is_empty(self):22        """Test when an empty iterable is given."""23        integers = []24        predicates = [predicate_1, predicate_2]25        r = list(multi_split_by(integers, predicates))26        self.assertEqual(1 + len(predicates), len(r))27        a, b, c = r...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
