diff --git a/atomsci/ddm/docs/PARAMETERS.md b/atomsci/ddm/docs/PARAMETERS.md index 135b332b..611ae5c6 100644 --- a/atomsci/ddm/docs/PARAMETERS.md +++ b/atomsci/ddm/docs/PARAMETERS.md @@ -276,6 +276,14 @@ The AMPL pipeline contains many parameters and options to fit models and make pr |*Description:*|True/False flag for setting verbosity| |*Default:*|FALSE| |*Type:*|Bool| + +- **seed** + +||| +|-|-| +|*Description:*|Seed used for initializing a random number generator to ensure results are reproducible. Default is None and a random seed will be generated.| +|*Default:*|None| +|*Type:*|int| - **production** @@ -529,6 +537,30 @@ the model will train for max_epochs regardless of validation error.| |*Default:*|scaffold| |*Type:*|str| +- **sampling_method** + +||| +|-|-| +|*Description:*|The sampling method for addressing class imbalance in classification datasets. Options include 'undersampling' and 'SMOTE'.| +|*Default:*|None| +|*Type:*|str| + +- **sampling_ratio** + +||| +|-|-| +|*Description:*|The desired ratio of the minority class to the majority class after sampling (e.g., if str, 'minority', 'not minority'; if float, '0.2', '1.0'). | +|*Default:*|auto| +|*Type:*|str| + +- **sampling_k_neighbors** + +||| +|-|-| +|*Description:*|The number of nearest neighbors to consider when generating synthetic samples (e.g., 5, 7, 9). Specifically used for SMOTE sampling method.| +|*Default:*|5| +|*Type:*|int| + - **mtss\_num\_super\_scaffolds** ||| diff --git a/atomsci/ddm/pipeline/GeneticAlgorithm.py b/atomsci/ddm/pipeline/GeneticAlgorithm.py index 0f6eb11a..53e3d967 100644 --- a/atomsci/ddm/pipeline/GeneticAlgorithm.py +++ b/atomsci/ddm/pipeline/GeneticAlgorithm.py @@ -1,10 +1,10 @@ import numpy as np +import uuid import scipy.spatial.distance as scipy_distance import multiprocessing -import random from tqdm import tqdm import timeit -from typing import Any, Callable, List, Tuple +from typing import Any, Callable, List, Tuple, Optional N_PROCS = multiprocessing.cpu_count() @@ -22,7 +22,8 @@ def __init__(self, init_pop: List[List[Any]], fitness_func: Callable, crossover_func: Callable, - mutate_func: Callable): + mutate_func: Callable, + seed: Optional[int]): """ Creates a GeneticAlgorithm object @@ -40,8 +41,14 @@ def __init__(self, mutate_func: Callable A callable that takes a list of chromosomes and returns another list of mutated chromosomes + seed: Optional[int] + Seed for random number generator """ + if seed is None: + seed = uuid.uuid4().int % (2**32) + self.random_state = np.random.default_rng(seed) + self.pop = init_pop self.pop_scores = None self.num_pop = len(init_pop) @@ -177,13 +184,13 @@ def step(self, print_timings: bool = False): # select parents using rank selection i = timeit.default_timer() - new_pop = self.crossover_func(parents, self.num_pop) + new_pop = self.crossover_func(parents, self.num_pop, random_state=self.random_state) if print_timings: print('\tcrossover %0.2f min'%((timeit.default_timer()-i)/60)) # mutate population i = timeit.default_timer() - self.pop = self.mutate_func(new_pop) + self.pop = self.mutate_func(new_pop, random_state=self.random_state) if print_timings: print('\tmutate %0.2f min'%((timeit.default_timer()-i)/60)) print('total %0.2f min'%((timeit.default_timer()-start)/60)) @@ -199,23 +206,23 @@ def step(self, print_timings: bool = False): def fitness_func(chromosome): return 1 - scipy_distance.rogerstanimoto(chromosome, target_chromosome) - def crossover_func(parents, pop_size): + def crossover_func(parents, pop_size, random_state): new_pop = [] for i in range(num_pop): parent1 = parents[i%len(parents)] parent2 = parents[(i+1)%len(parents)] - crossover_point = random.randint(0, len(parents[0])-1) + crossover_point = random_state.integers(0, len(parents[0])-1, 1)[0] new_pop.append(parent1[:crossover_point]+parent2[crossover_point:]) return new_pop - def mutate_func(pop, mutate_chance=0.01): + def mutate_func(pop, random_state, mutate_chance=0.01): new_pop = [] for chromosome in pop: new_chromosome = list(chromosome) for i, g in enumerate(new_chromosome): - if random.random() < mutate_chance: + if random_state.random() < mutate_chance: if new_chromosome[i] == 0: new_chromosome[i] = 1 else: diff --git a/atomsci/ddm/pipeline/MultitaskScaffoldSplit.py b/atomsci/ddm/pipeline/MultitaskScaffoldSplit.py index e3432360..37dd7a34 100644 --- a/atomsci/ddm/pipeline/MultitaskScaffoldSplit.py +++ b/atomsci/ddm/pipeline/MultitaskScaffoldSplit.py @@ -1,6 +1,5 @@ import argparse import logging -import random import timeit import tempfile from typing import List, Optional, Set, Tuple @@ -636,8 +635,8 @@ def split(self, A tuple with 3 elements that are training, validation, and test compound indices into dataset, respectively """ - if seed is not None: - np.random.seed(seed) + self.seed = seed + self.dataset = dataset self.diff_fitness_weight_tvt = diff_fitness_weight_tvt self.diff_fitness_weight_tvv = diff_fitness_weight_tvv @@ -674,7 +673,7 @@ def split(self, population.append(split_chromosome) gene_alg = ga.GeneticAlgorithm(population, self.grade, ga_crossover, - ga_mutate) + ga_mutate, self.seed) #gene_alg.iterate(num_generations) for i in range(self.num_generations): gene_alg.step(print_timings=print_timings) @@ -859,7 +858,8 @@ def train_valid_test_split(self, return train_dataset, valid_dataset, test_dataset def ga_crossover(parents: List[List[str]], - num_pop: int) -> List[List[str]]: + num_pop: int, + random_state: np.random.Generator) -> List[List[str]]: """Create the next generation from parents A random index is chosen and genes up to that index from @@ -872,6 +872,8 @@ def ga_crossover(parents: List[List[str]], A list of chromosomes. num_pop: int The number of new chromosomes to make + random_state: np.random.Generator + Random number generator Returns ------- List[List[str]] @@ -883,13 +885,14 @@ def ga_crossover(parents: List[List[str]], parent1 = parents[i%len(parents)] parent2 = parents[(i+1)%len(parents)] - crossover_point = random.randint(0, len(parents[0])-1) + crossover_point = random_state.integers(low=0, high=len(parents[0])-1, size=1)[0] new_pop.append(parent1[:crossover_point]+parent2[crossover_point:]) return new_pop def ga_mutate(new_pop: List[List[str]], - mutation_rate: float = .02) -> List[List[str]]: + random_state: np.random.Generator, + mutation_rate: float = .02,) -> List[List[str]]: """Mutate the population Each chromosome is copied and mutated at mutation_rate. @@ -900,6 +903,8 @@ def ga_mutate(new_pop: List[List[str]], ---------- new_pop: List[List[str]] A list of chromosomes. + random_state: np.random.Generator + Random number generator mutation_rate: float How often a mutation occurs. 0.02 is a good rate for my test sets. @@ -912,8 +917,8 @@ def ga_mutate(new_pop: List[List[str]], for solution in new_pop: new_solution = list(solution) for i, gene in enumerate(new_solution): - if random.random() < mutation_rate: - new_solution[i] = ['train', 'valid', 'test'][random.randint(0,2)] + if random_state.random() < mutation_rate: + new_solution[i] = ['train', 'valid', 'test'][random_state.integers(low=0, high=2, size=1)[0]] mutated.append(new_solution) return mutated @@ -1039,6 +1044,7 @@ def parse_args(): parser.add_argument('id_col', type=str, help='the column containing ids') parser.add_argument('response_cols', type=str, help='comma seperated string of response columns') parser.add_argument('output', type=str, help='name of the split file') + parser.add_argument('seed', type=int, default=0, help='Random seed used in random number generators.') return parser.parse_args() @@ -1054,5 +1060,6 @@ def parse_args(): mss = MultitaskScaffoldSplitter() mss_split_df = split_with(total_df, mss, smiles_col=args.smiles_col, id_col=args.id_col, response_cols=response_cols, - diff_fitness_weight=dfw, ratio_fitness_weight=rfw, num_generations=args.num_gens) + diff_fitness_weight=dfw, ratio_fitness_weight=rfw, num_generations=args.num_gens, + seed=args.seed) mss_split_df.to_csv(args.output, index=False) diff --git a/atomsci/ddm/pipeline/model_datasets.py b/atomsci/ddm/pipeline/model_datasets.py index 8ebd7aa8..baf0406d 100644 --- a/atomsci/ddm/pipeline/model_datasets.py +++ b/atomsci/ddm/pipeline/model_datasets.py @@ -395,7 +395,7 @@ def get_featurized_data(self, params=None): if params.prediction_type=='classification': w = w.astype(np.float32) - self.untransformed_dataset = NumpyDataset(features, self.vals, ids=ids) + self.update_untransformed_responses(ids, self.vals) self.dataset = NumpyDataset(features, self.vals, ids=ids, w=w) self.log.info("Using prefeaturized data; number of features = " + str(self.n_features)) return @@ -421,7 +421,7 @@ def get_featurized_data(self, params=None): self.log.debug("Number of features: " + str(self.n_features)) # Create the DeepChem dataset - self.untransformed_dataset = NumpyDataset(features, self.vals, ids=ids) + self.update_untransformed_responses(ids, self.vals) self.dataset = NumpyDataset(features, self.vals, ids=ids, w=w) # Checking for minimum number of rows if len(self.dataset) < params.min_compound_number: @@ -451,7 +451,7 @@ def get_dataset_tasks(self, dset_df): return self.tasks is not None # **************************************************************************************** - def split_dataset(self): + def split_dataset(self, random_state=None, seed=None): """Splits the dataset into paired training/validation and test subsets, according to the split strategy selected by the model params. For traditional train/valid/test splits, there is only one training/validation pair. For k-fold cross-validation splits, there are k different train/valid pairs; the validation sets are @@ -470,7 +470,7 @@ def split_dataset(self): # Create object to delegate splitting to. if self.splitting is None: - self.splitting = split.create_splitting(self.params) + self.splitting = split.create_splitting(self.params, random_state=random_state, seed=seed) self.train_valid_dsets, self.test_dset, self.train_valid_attr, self.test_attr = \ self.splitting.split_dataset(self.dataset, self.attr, self.params.smiles_col) if self.train_valid_dsets is None: @@ -497,6 +497,12 @@ def _check_classes(self): (Boolean): boolean specifying if all classes are specified in all splits """ ref_class_set = get_classes(self.train_valid_dsets[0][0].y) + num_classes = len(ref_class_set) + if num_classes != self.params.class_number: + logger = logging.getLogger('ATOM') + logger.warning(f"Expected class_number:{self.params.class_number} " + f"classes but got {num_classes} instead. Double check " + "response columns or class_number parameter.") for train, valid in self.train_valid_dsets: if not ref_class_set == get_classes(train.y): return False @@ -581,7 +587,7 @@ def create_dataset_split_table(self): return split_df # **************************************************************************************** - def load_presplit_dataset(self, directory=None): + def load_presplit_dataset(self, directory=None, random_state=None, seed=None): """Loads a table of compound IDs assigned to split subsets, and uses them to split the currently loaded featurized dataset. @@ -608,7 +614,7 @@ def load_presplit_dataset(self, directory=None): """ # Load the split table from the datastore or filesystem - self.splitting = split.create_splitting(self.params) + self.splitting = split.create_splitting(self.params, random_state=random_state, seed=seed) try: split_df, split_kv = self.load_dataset_split_table(directory) @@ -673,11 +679,31 @@ def combined_training_data(self): # All of the splits have the same combined train/valid data, regardless of whether we're using # k-fold or train/valid/test splitting. if self.combined_train_valid_data is None: + # normally combining one fold is sufficient, but if SMOTE or undersampling is being used + # just combining the first fold isn't enough (train, valid) = self.train_valid_dsets[0] combined_X = np.concatenate((train.X, valid.X), axis=0) combined_y = np.concatenate((train.y, valid.y), axis=0) combined_w = np.concatenate((train.w, valid.w), axis=0) combined_ids = np.concatenate((train.ids, valid.ids)) + + if self.params.sampling_method=='SMOTE' or self.params.sampling_method=='undersampling': + # for each successive fold, merge in any new compounds + # this loop just won't run if there are no additional folds + for train, valid in self.train_valid_dsets[1:]: + fold_ids = np.concatenate((train.ids, valid.ids)) + new_id_indexes = [i for i in range(len(fold_ids)) if i not in combined_ids] + + fold_ids = fold_ids[new_id_indexes] + fold_X = np.concatenate((train.X, valid.X), axis=0)[new_id_indexes] + fold_y = np.concatenate((train.y, valid.y), axis=0)[new_id_indexes] + fold_w = np.concatenate((train.w, valid.w), axis=0)[new_id_indexes] + + combined_X = np.concatenate((combined_X, fold_X), axis=0) + combined_y = np.concatenate((combined_y, fold_y), axis=0) + combined_w = np.concatenate((combined_w, fold_w), axis=0) + combined_ids = np.concatenate((combined_ids, fold_ids)) + self.combined_train_valid_data = NumpyDataset(combined_X, combined_y, w=combined_w, ids=combined_ids) return self.combined_train_valid_data @@ -729,6 +755,24 @@ def get_subset_responses_and_weights(self, subset): # ************************************************************************************* + def update_untransformed_responses(self, ids, y): + """ + Updates self.untransformed_response_dict with the given ids and y + + Parameters: + ids (list or np.ndarray): List or array of IDs for which to retrieve untransformed response values. + + y (list or np.ndarray): List or array of responses values. + + Returns: + None + """ + self.untransformed_response_dict.update( + dict(zip(ids, y)) + ) + + # ************************************************************************************* + def get_untransformed_responses(self, ids): """ Returns a numpy array of untransformed response values for the given IDs. @@ -740,9 +784,8 @@ def get_untransformed_responses(self, ids): np.ndarray: A numpy array of untransformed response values corresponding to the given IDs. """ - response_vals = np.zeros((len(ids), self.untransformed_dataset.y.shape[1])) - if len(self.untransformed_response_dict) == 0: - self.untransformed_response_dict = dict(zip(self.untransformed_dataset.ids, self.untransformed_dataset.y)) + num_tasks = len(self.untransformed_response_dict[ids[0]]) + response_vals = np.zeros((len(ids), num_tasks)) for i, id in enumerate(ids): response_vals[i] = self.untransformed_response_dict[id] @@ -803,6 +846,7 @@ def __init__(self, params, featurization, contains_responses=False): self.tasks = None self.attr = None self.contains_responses = contains_responses + self.untransformed_response_dict = {} # **************************************************************************************** def get_dataset_tasks(self, dset_df): @@ -867,7 +911,7 @@ def get_featurized_data(self, dset_df, is_featurized=False): self.log.warning("Done") self.n_features = self.featurization.get_feature_count() - self.untransformed_dataset= NumpyDataset(features, self.vals, ids=ids) + self.update_untransformed_responses(ids, self.vals) self.dataset = NumpyDataset(features, self.vals, ids=ids) # **************************************************************************************** @@ -944,6 +988,7 @@ def __init__(self, params, featurization=None, ds_client=None): super().__init__(params, featurization) self.dataset_oid = None + self.untransformed_response_dict = {} if params.dataset_name: self.dataset_name = params.dataset_name else: diff --git a/atomsci/ddm/pipeline/model_pipeline.py b/atomsci/ddm/pipeline/model_pipeline.py index 9ac2aa33..e6c5e4e2 100644 --- a/atomsci/ddm/pipeline/model_pipeline.py +++ b/atomsci/ddm/pipeline/model_pipeline.py @@ -30,6 +30,8 @@ from atomsci.ddm.pipeline import parameter_parser as parse from atomsci.ddm.pipeline import model_tracker as trkr from atomsci.ddm.pipeline import transformations as trans +from atomsci.ddm.pipeline import random_seed as rs +from atomsci.ddm.pipeline import sampling as sample logging.basicConfig(format='%(asctime)-15s %(message)s') @@ -179,7 +181,7 @@ class ModelPipeline: data (ModelDataset object): A data object that featurizes and splits the dataset """ - def __init__(self, params, ds_client=None, mlmt_client=None): + def __init__(self, params, ds_client=None, mlmt_client=None, random_state=None, seed=None): """Initializes ModelPipeline object. Args: @@ -213,6 +215,23 @@ def __init__(self, params, ds_client=None, mlmt_client=None): self.log = logging.getLogger('ATOM') self.run_mode = 'training' # default, can be overridden later self.start_time = time.time() + + # initialize seed + if seed is None: + seed = getattr(params, 'seed', None) + self.random_gen = rs.RandomStateGenerator(params, seed) + self.seed = self.random_gen.get_seed() + else: + # pass the seed into the RandomStateGenerator + self.random_gen = rs.RandomStateGenerator(seed) + self.seed = self.random_gen.get_seed() + + if random_state is None: + self.random_state = self.random_gen.get_random_state() + else: + self.random_state = random_state + # log the seed used + self.log.info('Initiating ModelPipeline with seed {}'.format(self.seed)) # Default dataset_name parameter from dataset_key if params.dataset_name is None: @@ -273,6 +292,7 @@ def load_featurize_data(self, params=None): Args: params (Namespace): Optional set of parameters to be used for featurization; by default this function uses the parameters used when the pipeline was created. + seed (int): Optional seed for reproducibility Side effects: Sets the following attributes of the ModelPipeline @@ -291,12 +311,24 @@ def load_featurize_data(self, params=None): self.log.info('Training in production mode. Ignoring ' 'previous split and creating production split. ' 'Production split will not be saved.') - self.data.split_dataset() - elif not (params.previously_split and self.data.load_presplit_dataset()): - self.data.split_dataset() + self.data.split_dataset(random_state=self.random_state, seed=self.seed) + elif not (params.previously_split and self.data.load_presplit_dataset(random_state=self.random_state, seed=self.seed)): + self.data.split_dataset(random_state=self.random_state, seed=self.seed) self.data.save_split_dataset() + # write split metadata + self.create_split_metadata() + self.save_split_metadata() if self.data.params.prediction_type == 'classification': self.data._validate_classification_dataset() + + # apply sampling before fitting transformers + if self.run_mode == 'training': + for i, (train, valid) in enumerate(self.data.train_valid_dsets): + if self.data.params.prediction_type == 'classification' and self.params.sampling_method is not None: + train = sample.apply_sampling_method(train, params, random_state=self.random_state, seed=self.seed) + self.data.update_untransformed_responses(train.ids, train.y) + self.data.train_valid_dsets[i] = (train, valid) + # We now create transformers after splitting, to allow for the case where the transformer # is fitted to the training data only. The transformers are then applied to the training, # validation and test sets separately. @@ -360,6 +392,13 @@ def create_model_metadata(self): hyperparam_uuid=self.params.hyperparam_uuid, ampl_version=mu.get_ampl_version() ) + # add in sampling method parameters for documentation/reproducibility + if self.params.sampling_method is not None: + model_params['sampling_method'] = self.params.sampling_method + if self.params.sampling_ratio is not None: + model_params['sampling_ratio'] = self.params.sampling_ratio + if self.params.sampling_k_neighbors is not None: + model_params['sampling_k_neighbors'] = self.params.sampling_k_neighbors splitting_metadata = self.data.get_split_metadata() model_metadata = dict( @@ -378,6 +417,8 @@ def create_model_metadata(self): model_metadata[key] = data for key, data in trans.get_transformer_specific_metadata(self.params).items(): model_metadata[key] = data + + model_metadata['seed'] = self.seed self.model_metadata = model_metadata @@ -431,6 +472,28 @@ def save_model_metadata(self, retries=5, sleep_sec=60): trkr.save_model_tarball(self.output_dir, self.params.model_tarball_path) self.model_wrapper._clean_up_excess_files(self.model_wrapper.model_dir) + # **************************************************************************************** + def create_split_metadata(self): + """Creates metadata for each split dataset. + It will save the seed used to create the split dataset and relevant parameters.""" + self.split_data = dict( + dataset_key = self.params.dataset_key, + id_col = self.params.id_col, + smiles_col = self.params.smiles_col, + response_cols = self.params.response_cols, + seed = self.seed + ) + self.splitting_metadata = self.data.get_split_metadata() + self.split_data['splitting_metadata'] = self.splitting_metadata + + # **************************************************************************************** + def save_split_metadata(self): + out_file = os.path.join(self.output_dir, 'split_metadata.json') + + with open(out_file, 'w') as out: + json.dump(self.split_data, out, sort_keys=True, indent=4, separators=(',', ': ')) + out.write("\n") + # **************************************************************************************** def create_prediction_metadata(self, prediction_results): """Initializes a data structure to hold performance metrics from a model run on a new dataset, @@ -592,7 +655,7 @@ def train_model(self, featurization=None): ## create model wrapper if not split_only if not self.params.split_only: - self.model_wrapper = model_wrapper.create_model_wrapper(self.params, self.featurization, self.ds_client) + self.model_wrapper = model_wrapper.create_model_wrapper(self.params, self.featurization, self.ds_client, random_state=self.random_state, seed=self.seed) self.model_wrapper.setup_model_dirs() self.load_featurize_data() @@ -1089,7 +1152,9 @@ def run_models(params, shared_featurization=None, generator=False): # Create the ModelWrapper object. pipeline.model_wrapper = model_wrapper.create_model_wrapper(pipeline.params, featurization, - pipeline.ds_client) + pipeline.ds_client, + random_state=pipeline.random_state, + seed=pipeline.seed) # Get the tarball containing the saved model from the datastore, and extract it into model_dir. model_dataset_oid = metadata_dict['model_parameters']['model_dataset_oid'] @@ -1181,7 +1246,9 @@ def regenerate_results(result_dir, params=None, metadata_dict=None, shared_featu # Create the ModelWrapper object. pipeline.model_wrapper = model_wrapper.create_model_wrapper(pipeline.params, featurization, - pipeline.ds_client) + pipeline.ds_client, + random_state=pipeline.random_state, + seed=pipeline.seed) # Get the tarball containing the saved model from the datastore, and extract it into model_dir (old format) # or output_dir (new format) according to the format of the tarball contents. @@ -1409,7 +1476,9 @@ def create_prediction_pipeline_from_file(params, reload_dir, model_path=None, mo pipeline.orig_params = orig_params # Create the ModelWrapper object. - pipeline.model_wrapper = model_wrapper.create_model_wrapper(pipeline.params, featurization) + pipeline.model_wrapper = model_wrapper.create_model_wrapper(pipeline.params, featurization, + random_state=pipeline.random_state, + seed=pipeline.seed) orig_log_level = pipeline.log.getEffectiveLevel() if verbose: diff --git a/atomsci/ddm/pipeline/model_wrapper.py b/atomsci/ddm/pipeline/model_wrapper.py index 59e515b6..2c068f81 100644 --- a/atomsci/ddm/pipeline/model_wrapper.py +++ b/atomsci/ddm/pipeline/model_wrapper.py @@ -168,7 +168,7 @@ def all_bases(model): return result # **************************************************************************************** -def create_model_wrapper(params, featurizer, ds_client=None): +def create_model_wrapper(params, featurizer, ds_client=None, random_state=None, seed=None): """Factory function for creating Model objects of the correct subclass for params.model_type. Args: @@ -186,11 +186,11 @@ def create_model_wrapper(params, featurizer, ds_client=None): """ if params.model_type == 'NN': if params.featurizer == 'graphconv': - return GraphConvDCModelWrapper(params, featurizer, ds_client) + return GraphConvDCModelWrapper(params, featurizer, ds_client, random_state=random_state, seed=seed) else: - return MultitaskDCModelWrapper(params, featurizer, ds_client) + return MultitaskDCModelWrapper(params, featurizer, ds_client, random_state=random_state, seed=seed) elif params.model_type == 'RF': - return DCRFModelWrapper(params, featurizer, ds_client) + return DCRFModelWrapper(params, featurizer, ds_client, random_state=random_state, seed=seed) elif params.model_type == 'xgboost': if not xgboost_supported: raise Exception("Unable to import xgboost. \ @@ -206,9 +206,9 @@ def create_model_wrapper(params, featurizer, ds_client=None): installation: \ from pip: pip install xgboost==0.90") else: - return DCxgboostModelWrapper(params, featurizer, ds_client) + return DCxgboostModelWrapper(params, featurizer, ds_client, random_state=random_state, seed=seed) elif params.model_type == 'hybrid': - return HybridModelWrapper(params, featurizer, ds_client) + return HybridModelWrapper(params, featurizer, ds_client, random_state=random_state, seed=seed) elif params.model_type in pp.model_wl: requested_model = pp.model_wl[params.model_type] bases = all_bases(requested_model) @@ -217,9 +217,9 @@ def create_model_wrapper(params, featurizer, ds_client=None): if any(['TorchModel' in str(b) for b in bases]): if not afp_supported: raise Exception("dgl and dgllife packages must be installed to use attentive_fp model.") - return PytorchDeepChemModelWrapper(params, featurizer, ds_client) + return PytorchDeepChemModelWrapper(params, featurizer, ds_client, random_state=random_state, seed=seed) elif any(['KerasModel' in str(b) for b in bases]): - return KerasDeepChemModelWrapper(params, featurizer, ds_client) + return KerasDeepChemModelWrapper(params, featurizer, ds_client, random_state=random_state, seed=seed) else: raise ValueError("Unknown model_type %s" % params.model_type) @@ -254,7 +254,7 @@ class ModelWrapper(object): best_model_dir (str): The subdirectory under output_dir that contains the best model. Created in setup_model_dirs """ - def __init__(self, params, featurizer, ds_client): + def __init__(self, params, featurizer, ds_client, random_state=None, seed=None): """Initializes ModelWrapper object. Args: @@ -298,6 +298,9 @@ def __init__(self, params, featurizer, ds_client): self.transformers_x = trans.get_blank_transformations() self.transformers_w = trans.get_blank_transformations() + self.random_state = random_state + self.seed = seed + # **************************************************************************************** def setup_model_dirs(self): @@ -1220,7 +1223,7 @@ class HybridModelWrapper(NNModelWrapper): """ - def __init__(self, params, featurizer, ds_client): + def __init__(self, params, featurizer, ds_client, random_state=None, seed=None): """Initializes HybridModelWrapper object. Args: @@ -1245,7 +1248,8 @@ def __init__(self, params, featurizer, ds_client): model: dc.models.TorchModel """ - super().__init__(params, featurizer, ds_client) + super().__init__(params, featurizer, ds_client, random_state=random_state, seed=seed) + if self.params.layer_sizes is None: if self.params.featurizer == 'ecfp': self.params.layer_sizes = [1000, 500] @@ -1650,7 +1654,7 @@ class ForestModelWrapper(ModelWrapper): contains code that is similar between the two tree based classes """ - def __init__(self, params, featurizer, ds_client): + def __init__(self, params, featurizer, ds_client, random_state=None, seed=None): """Initializes DCRFModelWrapper object. Args: @@ -1659,7 +1663,7 @@ def __init__(self, params, featurizer, ds_client): featurizer (Featurization): Object managing the featurization of compounds ds_client: datastore client. """ - super().__init__(params, featurizer, ds_client) + super().__init__(params, featurizer, ds_client, random_state=random_state, seed=seed) self.best_model_dir = os.path.join(self.output_dir, 'best_model') self.model_dir = self.best_model_dir os.makedirs(self.best_model_dir, exist_ok=True) @@ -1860,7 +1864,7 @@ class DCRFModelWrapper(ForestModelWrapper): """ - def __init__(self, params, featurizer, ds_client): + def __init__(self, params, featurizer, ds_client, random_state=None, seed=None): """Initializes DCRFModelWrapper object. Args: @@ -1869,7 +1873,7 @@ def __init__(self, params, featurizer, ds_client): featurizer (Featurization): Object managing the featurization of compounds ds_client: datastore client. """ - super().__init__(params, featurizer, ds_client) + super().__init__(params, featurizer, ds_client, random_state=random_state, seed=seed) # **************************************************************************************** def make_dc_model(self, model_dir): @@ -1887,12 +1891,14 @@ def make_dc_model(self, model_dir): rf_model = RandomForestRegressor(n_estimators=self.params.rf_estimators, max_features=self.params.rf_max_features, max_depth=self.params.rf_max_depth, - n_jobs=-1) + n_jobs=-1, + random_state=self.seed) else: rf_model = RandomForestClassifier(n_estimators=self.params.rf_estimators, max_features=self.params.rf_max_features, max_depth=self.params.rf_max_depth, - n_jobs=-1) + n_jobs=-1, + random_state=self.seed) return dc.models.sklearn_models.SklearnModel(rf_model, model_dir=model_dir) @@ -2012,7 +2018,7 @@ class DCxgboostModelWrapper(ForestModelWrapper): """ - def __init__(self, params, featurizer, ds_client): + def __init__(self, params, featurizer, ds_client, random_state=None, seed=None): """Initializes RunModel object. Args: @@ -2021,7 +2027,7 @@ def __init__(self, params, featurizer, ds_client): featurizer (Featurization): Object managing the featurization of compounds ds_client: datastore client. """ - super().__init__(params, featurizer, ds_client) + super().__init__(params, featurizer, ds_client, random_state=random_state, seed=seed) # **************************************************************************************** def make_dc_model(self, model_dir): @@ -2052,7 +2058,7 @@ def make_dc_model(self, model_dir): reg_lambda=1, scale_pos_weight=1, base_score=0.5, - random_state=0, + random_state= self.seed, missing=np.nan, importance_type='gain', n_jobs=-1, @@ -2077,7 +2083,7 @@ def make_dc_model(self, model_dir): reg_lambda=1, scale_pos_weight=1, base_score=0.5, - random_state=0, + random_state=self.seed, importance_type='gain', missing=np.nan, gpu_id = -1, @@ -2190,7 +2196,7 @@ def reload_model(self, reload_dir): reg_lambda=1, scale_pos_weight=1, base_score=0.5, - random_state=0, + random_state=self.seed, missing=np.nan, importance_type='gain', n_jobs=-1, @@ -2215,7 +2221,7 @@ def reload_model(self, reload_dir): reg_lambda=1, scale_pos_weight=1, base_score=0.5, - random_state=0, + random_state=self.seed, importance_type='gain', missing=np.nan, gpu_id = -1, @@ -2368,7 +2374,7 @@ class PytorchDeepChemModelWrapper(NNModelWrapper): valid_perfs (dict): A dictionary of predicted values and metrics on the validation dataset """ - def __init__(self, params, featurizer, ds_client): + def __init__(self, params, featurizer, ds_client, random_state=None, seed=None): """Initializes AttentiveFPModelWrapper object. Creates the underlying DeepChem AttentiveFPModel instance. Args: @@ -2378,9 +2384,8 @@ def __init__(self, params, featurizer, ds_client): ds_client: datastore client. """ # use NNModelWrapper init. - super().__init__(params, featurizer, ds_client) + super().__init__(params, featurizer, ds_client, random_state=random_state, seed=seed) self.num_epochs_trained = 0 - self.model = self.recreate_model() # **************************************************************************************** @@ -2406,6 +2411,7 @@ def recreate_model(self, **kwargs): # build the model model = chosen_model( + sed = self.seed, **extracted_features ) @@ -2736,7 +2742,7 @@ class GraphConvDCModelWrapper(KerasDeepChemModelWrapper): """ - def __init__(self, params, featurizer, ds_client): + def __init__(self, params, featurizer, ds_client, random_state=None, seed=None): """Initializes GraphConvDCModelWrapper object. Args: @@ -2764,12 +2770,11 @@ def __init__(self, params, featurizer, ds_client): model: The dc.models.GraphConvModel, MultitaskRegressor, or MultitaskClassifier object, as specified by the params attribute """ - super().__init__(params, featurizer, ds_client) + super().__init__(params, featurizer, ds_client, random_state=random_state, seed=seed) # TODO (ksm): The next two attributes aren't used; suggest we drop them. self.g = tf.Graph() self.sess = tf.compat.v1.Session(graph=self.g) self.num_epochs_trained = 0 - self.model = self.recreate_model(model_dir=self.model_dir) # **************************************************************************************** @@ -2807,7 +2812,8 @@ def recreate_model(self, model_dir=None): dense_layer_size=self.params.layer_sizes[-1], dropout=self.params.dropouts, penalty=self.params.weight_decay_penalty, - penalty_type=self.params.weight_decay_penalty_type) + penalty_type=self.params.weight_decay_penalty_type, + seed=self.seed) return model # **************************************************************************************** diff --git a/atomsci/ddm/pipeline/parameter_parser.py b/atomsci/ddm/pipeline/parameter_parser.py index 7e3b4e0c..c7b1a3e4 100644 --- a/atomsci/ddm/pipeline/parameter_parser.py +++ b/atomsci/ddm/pipeline/parameter_parser.py @@ -537,7 +537,7 @@ def get_list_args(self): } convert_to_int_list = {'layer_sizes','rf_max_features','rf_estimators', 'rf_max_depth', 'umap_dim', 'umap_neighbors', 'layer_nums', 'node_nums', - 'xgb_max_depth', 'xgb_n_estimators'}.union(all_auto_int_lists()) + 'xgb_max_depth', 'xgb_n_estimators', 'seed'}.union(all_auto_int_lists()) convert_to_numeric_list = convert_to_float_list | convert_to_int_list keep_as_list = {'dropouts','weight_init_stddevs','bias_init_consts', 'layer_sizes','dropout_list','layer_nums'}.union(all_auto_lists()) @@ -1038,6 +1038,10 @@ def get_parser(): '--verbose', dest='verbose', action='store_true', help='True/False flag for setting verbosity') parser.set_defaults(verbose=False) + parser.add_argument( + '--seed', dest='seed', default=None, + help='Random seed used for initializing the random number generator to ensure results are reproducible.' + 'Default is None and a random seed will be generated.') # ********************************************************************************************************** # model_building_parameters: graphconv @@ -1224,6 +1228,19 @@ def get_parser(): help='Type of splitter to use: index, random, scaffold, butina, ave_min, temporal, fingerprint, multitaskscaffold or stratified.' ' Used to set the splitting.py subclass. Can be input as a comma separated list for hyperparameter search' ' (e.g. \'scaffold\',\'random\')') + # sampling specific parameters (imbalance-learn) + parser.add_argument( + '--sampling_method', dest='sampling_method', type=str, default=None, + help='Method for sampling to address class imbalance (e.g., \'undersampling\', \'SMOTE\')') + + parser.add_argument( + '--sampling_ratio', dest='sampling_ratio', type=str, default='auto', + help='The "sampling_ratio" parameter of SMOTE must be a float in the range (0.0, 1.0], a str ' + 'among {"auto", "not majority", "minority", "all", "not minority"}') + parser.add_argument( + '--sampling_k_neighbors', dest='sampling_k_neighbors', type=int, default=5, + help='The nearest neighbors used to define the neighborhood of samples to use to generate the synthetic samples. Specifically used for SMOTE.') + parser.add_argument( '--mtss_num_super_scaffolds', default=40, type=int, @@ -1731,8 +1748,37 @@ def postprocess_args(parsed_args): if vars(parsed_args).get('dataset_key') and os.path.exists(parsed_args.dataset_key): _ = mto.many_to_one(fn=parsed_args.dataset_key, smiles_col=parsed_args.smiles_col, id_col=parsed_args.id_col) + # Validates the sampling_ratio argument for SMOTE and undersampling + parsed_args.sampling_ratio = validate_sampling_strategy_argument(parsed_args.sampling_ratio) + return parsed_args +#*********************************************************************************************************** +def validate_sampling_strategy_argument(value): + """Validates sampling_strategy parameter for SMOTE and undersampling. + Validates that the input value is either a float in the range (0.0, 1.0] or a string among + {'auto', 'not majority', 'minority', 'all', 'not minority'}. Raises a ValueError if the validation fails. + + Args: + value (str): The input value to validate. + + Raises: + ValueError: If the value is not a float in the range (0.0, 1.0] or a valid string. + """ + valid_strings = {"auto", "not majority", "minority", "all", "not minority"} + + try: + float_value = float(value) + if float_value <= 0.0 or float_value > 1.0: + raise ValueError(f"Value '{value}' is not a float in the range (0.0, 1.0].") + else: + return float_value + except ValueError: + if value not in valid_strings: + raise ValueError(f"Value '{value}' is not a valid string among {valid_strings}.") + else: + return value + #*********************************************************************************************************** def make_dataset_key_absolute(parsed_args): diff --git a/atomsci/ddm/pipeline/perf_data.py b/atomsci/ddm/pipeline/perf_data.py index 727958af..a8931c5a 100644 --- a/atomsci/ddm/pipeline/perf_data.py +++ b/atomsci/ddm/pipeline/perf_data.py @@ -210,28 +210,6 @@ def __init__(self, model_dataset, subset): self.model_score = None self.weights = None - # **************************************************************************************** - def accumulate_preds(self, predicted_vals, ids, pred_stds=None): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - # **************************************************************************************** - def get_pred_values(self): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - # **************************************************************************************** - def compute_perf_metrics(self, per_task=False): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - # **************************************************************************************** # class RegressionPerfData def model_choice_score(self, score_type='r2'): @@ -399,28 +377,6 @@ def __init__(self, model_dataset, subset): self.model_score = None self.weights = None - # **************************************************************************************** - def accumulate_preds(self, predicted_vals, ids, pred_stds=None): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - # **************************************************************************************** - def get_pred_values(self): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - # **************************************************************************************** - def compute_perf_metrics(self, per_task=False): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - # **************************************************************************************** # class HybridPerfData def model_choice_score(self, score_type='r2'): @@ -623,22 +579,7 @@ def __init__(self, model_dataset, subset): self.perf_metrics = [] self.model_score = None self.weights = None - - # **************************************************************************************** - def accumulate_preds(self, predicted_vals, ids, pred_stds=None): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - # **************************************************************************************** - def get_pred_values(self): - """Raises: - NotImplementedError: The method is implemented by subclasses - """ - raise NotImplementedError - - + # **************************************************************************************** # class ClassificationPerfData def model_choice_score(self, score_type='roc_auc'): @@ -658,8 +599,8 @@ def model_choice_score(self, score_type='roc_auc'): """ ids, pred_classes, class_probs, prob_stds = self.get_pred_values() - real_vals = self.get_real_values() - weights = self.get_weights() + real_vals = self.get_real_values(ids=ids) + weights = self.get_weights(ids=ids) scores = [] for i in range(self.num_tasks): @@ -997,7 +938,11 @@ def get_pred_values(self): otherwise. """ - ids = sorted(self.pred_vals.keys()) + #ids = sorted(self.pred_vals.keys()) + all_ids = sorted(self.pred_vals.keys()) + # with kfold + SMOTE, not all ids have predictions + ids = [id for id in all_ids if not (self.pred_vals[id].size == 0)] + if self.subset in ['train', 'test', 'train_valid']: vals = np.concatenate([self.pred_vals[id].mean(axis=0, keepdims=True).reshape((1,-1)) for id in ids]) if self.folds > 1: @@ -1153,6 +1098,8 @@ def __init__(self, model_dataset, subset, predict_probs=True): self.num_cmpds = dataset.y.shape[0] self.num_tasks = dataset.y.shape[1] self.num_classes = len(set(model_dataset.dataset.y.flatten())) + # pred vals maps compound ids to a matrix of predictions. + # predictions will be concatentated one by one as they come in in accumulate_preds self.pred_vals = dict([(id, np.empty((0, self.num_tasks, self.num_classes), dtype=np.float32)) for id in dataset.ids]) real_vals, self.weights = model_dataset.get_subset_responses_and_weights(self.subset) @@ -1195,6 +1142,7 @@ def accumulate_preds(self, predicted_vals, ids, pred_stds=None): """ class_probs = self._reshape_preds(predicted_vals) for i, id in enumerate(ids): + # Record predictions for each compound. self.pred_vals[id] = np.concatenate([self.pred_vals[id], class_probs[i,:,:].reshape((1,self.num_tasks,-1))], axis=0) self.folds += 1 real_vals = self.get_real_values(ids=ids) @@ -1236,7 +1184,10 @@ def get_pred_values(self): probability estimates (only available for the 'train' and 'test' subsets; None otherwise). """ - ids = sorted(self.pred_vals.keys()) + all_ids = sorted(self.pred_vals.keys()) + # with kfold + SMOTE, not all ids have predictions + ids = [id for id in all_ids if not (self.pred_vals[id].size == 0)] + if self.subset in ['train', 'test', 'train_valid']: class_probs = np.concatenate([self.pred_vals[id].mean(axis=0, keepdims=True) for id in ids], axis=0) @@ -1392,7 +1343,7 @@ def __init__(self, model_dataset, subset): # **************************************************************************************** # class SimpleRegressionPerfData - def accumulate_preds(self, predicted_vals, ids, pred_stds=None): + def accumulate_preds(self, predicted_vals, ids=None, pred_stds=None): """Add training, validation or test set predictions to the data structure where we keep track of them. diff --git a/atomsci/ddm/pipeline/random_seed.py b/atomsci/ddm/pipeline/random_seed.py new file mode 100644 index 00000000..847a62ae --- /dev/null +++ b/atomsci/ddm/pipeline/random_seed.py @@ -0,0 +1,66 @@ +""" Used to set random seed from parameter_parser for reproducibility. """ +import numpy as np +import uuid +import random +import torch +import tensorflow as tf +import logging +logging.basicConfig(format='%(asctime)-15s %(message)s') +#---------------------------------------------------------------------------------- +class RandomStateGenerator: + """ + A class to manage random state and seed generation for reproducible randomness. + + Attributes: + params: Additional parameters. + seed: The seed for the random state. + random_state: The random state generator. + """ + def __init__(self, params=None, seed=None): + self.params = params + if seed is not None: + self.seed = seed + elif self.params.seed is not None: + self.seed = self.params.seed + else: + self.seed = uuid.uuid4().int % (2**32) + self.set_seed(self.seed) + + def set_seed(self, seed): + log = logging.getLogger('ATOM') + log.warning("The global seed is being set to %d, for reproducibility. Note that this action will synchronize the randonmess across all libraries which may impact the randomness of other parts of the pipeline.", seed) + """Set the seed for all relevant libraries.""" + + global _seed, _random_state + _seed = seed + + _random_state = np.random.default_rng(_seed) + + # set seed for numpy + np.random.default_rng(_seed) + + # needed for deepchem + np.random.seed(_seed) + + # set seed for random + random.seed(_seed) + + # set seed for PyTorch + torch.manual_seed(_seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(_seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + # set seed for tensorflow + tf.random.set_seed(_seed) + + self.random_state = _random_state + + def get_seed(self): + """Returns the seed when called""" + return self.seed + + def get_random_state(self): + """Returns the random state when called""" + return self.random_state \ No newline at end of file diff --git a/atomsci/ddm/pipeline/sampling.py b/atomsci/ddm/pipeline/sampling.py new file mode 100644 index 00000000..c1fb28b1 --- /dev/null +++ b/atomsci/ddm/pipeline/sampling.py @@ -0,0 +1,55 @@ +"""Module used to perform sampling on classification datasets.""" +import numpy as np +# sampling specific libraries +from imblearn.over_sampling import SMOTE +from imblearn.under_sampling import RandomUnderSampler +# deepchem for dataset +import deepchem as dc +# ===================================================================================================== +def apply_sampling_method(train, params, random_state=None, seed=None): + """ + Apply a sampling method to a classification dataset when split_strategy=='train_valid_test' + + Inputs: + - train: DeepChem NumpyDataset with train.X, train.y, train.w, and train.ids + - params (NameSpace object): contains all the parameter information. + Returns: + - train_resampled: a DeepChem NumpyDataset with train.X, train.y, train.w, and train.ids + """ + sampling_ratio = params.sampling_ratio + + if params.sampling_method=='SMOTE': + sampling_k_neighbors = params.sampling_k_neighbors # smote specific parameter + smote=SMOTE(sampling_strategy=sampling_ratio, k_neighbors=sampling_k_neighbors, random_state=seed) + X_resampled, y_resampled = smote.fit_resample(train.X, train.y.ravel()) + y_resampled=y_resampled.reshape(-1, 1) + + # calculate synthetic weights + num_original = len(train.X) + num_synthetic = len(X_resampled)-num_original + + # set the new weights equal to 1 + average_weight = 1 #np.mean(train.w) + synthetic_weights=np.full((num_synthetic,1), average_weight, dtype=np.float64) + resampled_weights=np.concatenate([train.w, synthetic_weights]) + + # update the id length with synthetic ids for any newly introduced data + synthetic_ids = [f"synthetic_{i}" for i in range(num_synthetic)] + new_ids = np.concatenate([train.ids, synthetic_ids]) + + elif params.sampling_method == 'undersampling': + undersampler = RandomUnderSampler(sampling_strategy=sampling_ratio, random_state=seed) + X_resampled, y_resampled = undersampler.fit_resample(train.X, train.y.ravel()) + y_resampled=y_resampled.reshape(-1, 1) + + #adjust weights and ids + resampled_indices = undersampler.sample_indices_ + resampled_weights = train.w[resampled_indices] + new_ids = train.ids[resampled_indices] + + else: + raise ValueError(f"Unknown sampling method: {params.sampling_method}. Supported methods are 'SMOTE' and 'undersampling'.") + # return a new dc.data.NumpyDataset with the resampled data, the original weights and ids + train_resampled= dc.data.NumpyDataset(X_resampled, y_resampled, resampled_weights, new_ids) + + return train_resampled \ No newline at end of file diff --git a/atomsci/ddm/pipeline/splitting.py b/atomsci/ddm/pipeline/splitting.py index d79a9c71..1b8ca52a 100644 --- a/atomsci/ddm/pipeline/splitting.py +++ b/atomsci/ddm/pipeline/splitting.py @@ -27,7 +27,7 @@ 'mtss_train_valid_dist_weight', 'mtss_split_fraction_weight', 'mtss_num_pop', 'mtss_response_distr_weight'] -def create_splitting(params): +def create_splitting(params, random_state=None, seed=None): """Factory function to create appropriate type of Splitting object, based on dataset parameters Args: @@ -43,11 +43,11 @@ def create_splitting(params): """ if params.production: - return ProductionSplitting(params) + return ProductionSplitting(params, random_state=random_state, seed=seed) elif params.split_strategy == 'train_valid_test': - return TrainValidTestSplitting(params) + return TrainValidTestSplitting(params, random_state=random_state, seed=seed) elif params.split_strategy == 'k_fold_cv': - return KFoldSplitting(params) + return KFoldSplitting(params, random_state=random_state, seed=seed) else: raise Exception("Unknown split strategy %s" % params.split_strategy) @@ -172,7 +172,7 @@ class Splitting(object): """ - def __init__(self, params): + def __init__(self, params, random_state=None, seed=None): """Constructor, also serves as a factory method for creating the associated DeepChem splitter object Args: @@ -193,9 +193,14 @@ def __init__(self, params): splitter (Deepchem split object): A splitting object of the subtype specified by split """ + self.random_state = random_state + self.seed = seed + self.params = params self.split = params.splitter - if params.splitter == 'index': + if params.production: + self.splitter = ProductionSplitter() + elif params.splitter == 'index': self.splitter = dc.splits.IndexSplitter() elif params.splitter == 'random': self.splitter = dc.splits.RandomSplitter() @@ -280,7 +285,7 @@ class KFoldSplitting(Splitting): """ - def __init__(self, params): + def __init__(self, params, random_state=None, seed=None): """Initialization method for KFoldSplitting. Sets the following attributes for KFoldSplitting: @@ -293,9 +298,10 @@ def __init__(self, params): num_folds (int): The number of k-fold splits to perform """ - super().__init__(params) + super().__init__(params, random_state, seed) self.num_folds = params.num_folds + # **************************************************************************************** def get_split_prefix(self, parent=''): @@ -364,13 +370,13 @@ def split_dataset(self, dataset, attr_df, smiles_col): # Use DeepChem train_test_split() to select held-out test set; then use k_fold_split on the # training set to split it into training/validation folds. if self.split == 'butina': - train_cv, test, _ = self.splitter.train_valid_test_split(dataset) + train_cv, test, _ = self.splitter.train_valid_test_split(dataset, seed=self.seed) self.splitter = dc.splits.ScaffoldSplitter() - train_cv_pairs = self.splitter.k_fold_split(train_cv, self.num_folds) + train_cv_pairs = self.splitter.k_fold_split(train_cv, self.num_folds, seed=self.seed) else: # TODO: Add special handling for AVE splitter - train_cv, test = self.splitter.train_test_split(dataset, frac_train=train_frac) - train_cv_pairs = self.splitter.k_fold_split(train_cv, self.num_folds) + train_cv, test = self.splitter.train_test_split(dataset, frac_train=train_frac, seed=self.seed) + train_cv_pairs = self.splitter.k_fold_split(train_cv, self.num_folds, seed=self.seed) train_valid_dsets = [] train_valid_attr = [] @@ -403,7 +409,7 @@ class TrainValidTestSplitting(Splitting): """ - def __init__(self, params): + def __init__(self, params, random_state=None, seed=None): """Initialization method for TrainValidTestSplitting. Sets the following attributes for TrainValidTestSplitting: @@ -416,7 +422,7 @@ def __init__(self, params): num_folds (int): The number of k-fold splits to perform. In this case, it is always set to 1 """ - super().__init__(params) + super().__init__(params, random_state=random_state, seed=seed) self.num_folds = 1 # **************************************************************************************** @@ -490,11 +496,11 @@ def split_dataset(self, dataset, attr_df, smiles_col): if self.split == 'butina': # Can't use train_test_split with Butina because Butina splits into train and valid sets only. - train_valid, test, _ = self.splitter.train_valid_test_split(dataset) + train_valid, test, _ = self.splitter.train_valid_test_split(dataset, seed=self.seed) self.splitter = dc.splits.ScaffoldSplitter() # With Butina splitting, we don't have control over the size of the test set train_frac = 1.0 - self.params.split_valid_frac - train, valid = self.splitter.train_test_split(train_valid, frac_train=train_frac) + train, valid = self.splitter.train_test_split(train_valid, frac_train=train_frac, seed=self.seed) elif self.split == 'ave_min': # AVEMinSplitter also only does train-valid splits, but at least nested splits seem to work. # TODO: Change this if we modify AVE splitter to do 3-way splits internally. @@ -503,11 +509,11 @@ def split_dataset(self, dataset, attr_df, smiles_col): log.info("Performing split for test set") train_valid, test, _ = self.splitter.train_valid_test_split(dataset, frac_train=train_valid_frac, frac_valid=self.params.split_test_frac, - frac_test=0.0) + frac_test=0.0, seed=self.seed) log.info("Performing split of training and validation sets") train, valid, _ = self.splitter.train_valid_test_split(train_valid, frac_train=train_frac/train_valid_frac, frac_valid=self.params.split_valid_frac/train_valid_frac, - frac_test=0.0) + frac_test=0.0, seed=self.seed) log.info("Results of 3-way split: %d training, %d validation, %d test compounds" % ( train.X.shape[0], valid.X.shape[0], test.X.shape[0])) elif self.split == 'temporal': @@ -529,11 +535,12 @@ def split_dataset(self, dataset, attr_df, smiles_col): response_distr_fitness_weight=self.params.mtss_response_distr_weight, num_super_scaffolds=self.params.mtss_num_super_scaffolds, num_pop=self.params.mtss_num_pop, - num_generations=self.params.mtss_num_generations) + num_generations=self.params.mtss_num_generations, + seed=self.seed) else: train_frac = 1.0 - self.params.split_valid_frac - self.params.split_test_frac train, valid, test = self.splitter.train_valid_test_split(dataset, - frac_train=train_frac, frac_valid=self.params.split_valid_frac, frac_test=self.params.split_test_frac) + frac_train=train_frac, frac_valid=self.params.split_valid_frac, frac_test=self.params.split_test_frac, seed=self.seed) # After splitting unique compound_ids or SMILES are expanded train, train_attr = dm.expand_selection(train.ids) @@ -558,10 +565,11 @@ def split( # **************************************************************************************** class ProductionSplitting(Splitting): - def __init__(self, params): + def __init__(self, params, random_state=None, seed=None): """This Splitting only does one thing and ignores all splitter parameters""" - self.splitter = ProductionSplitter() + super().__init__(params, random_state=random_state, seed=seed) self.split = 'production' + self.num_folds = 1 # **************************************************************************************** def get_split_prefix(self, parent=''): @@ -621,7 +629,7 @@ def split_dataset(self, dataset, attr_df, smiles_col): dm = DatasetManager(dataset=dataset, attr_df=attr_df, smiles_col=smiles_col, needs_smiles=self.needs_smiles()) dataset = dm.compact_dataset() - train, valid, test = self.splitter.train_valid_test_split(dataset) + train, valid, test = self.splitter.train_valid_test_split(dataset, seed=self.seed) # After splitting unique compound_ids or SMILES are expanded train, train_attr = dm.expand_selection(train.ids) diff --git a/atomsci/ddm/test/integrative/balancing_trans/jsons/SMOTE_balancing_transformer.json b/atomsci/ddm/test/integrative/balancing_trans/jsons/SMOTE_balancing_transformer.json new file mode 100644 index 00000000..49d7f4ab --- /dev/null +++ b/atomsci/ddm/test/integrative/balancing_trans/jsons/SMOTE_balancing_transformer.json @@ -0,0 +1,31 @@ +{ + "dataset_key" : "replaced", + "datastore" : "False", + "uncertainty": "False", + "splitter": "scaffold", + "split_valid_frac": "0.20", + "split_test_frac": "0.20", + "split_strategy": "train_valid_test", + "prediction_type": "classification", + "model_choice_score_type": "roc_auc", + "response_cols" : "active", + "id_col": "compound_id", + "smiles_col" : "rdkit_smiles", + "result_dir": "replaced", + "system": "LC", + "transformers": "True", + "model_type": "NN", + "featurizer": "computed_descriptors", + "descriptor_type": "rdkit_raw", + "weight_transform_type": "balancing", + "learning_rate": ".0007", + "layer_sizes": "512,128", + "dropouts": "0.3,0.3", + "save_results": "False", + "max_epochs": "2", + "early_stopping_patience": "2", + "verbose": "False", + "sampling_method": "SMOTE", + "sampling_ratio": "0.5", + "seed":"0" + } \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py b/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py index f09369e7..b1b156c7 100644 --- a/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py +++ b/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py @@ -23,16 +23,49 @@ def test_balancing_transformer(): res_dir = tempfile.mkdtemp() + print('-=======normal balancing===================================') balanced_params = params_w_balan(dset_key, res_dir) balanced_weights = make_pipeline_and_get_weights(balanced_params) (major_weight, minor_weight), (major_count, minor_count) = np.unique(balanced_weights, return_counts=True) assert major_weight < minor_weight assert major_count > minor_count + print('-==========================================') + print('-=======no balancing===================================') nonbalanced_params = params_wo_balan(dset_key, res_dir) nonbalanced_weights = make_pipeline_and_get_weights(nonbalanced_params) (weight,), (count,) = np.unique(nonbalanced_weights, return_counts=True) assert weight == 1 + print('-==========================================') + + print('-=======SMOTE balancing===================================') + smote_balanced_params = params_w_SMOTE_balan(dset_key, res_dir) + smote_balanced_params['sampling_ratio'] = 1 + print('sampling_ratio: ', smote_balanced_params['sampling_ratio']) + smote_balanced_weights = make_pipeline_and_get_weights(smote_balanced_params) + # all weights should be the same + (weight1,), (count1,)= np.unique(smote_balanced_weights, return_counts=True) + print('-==========================================') + + print('-=======SMOTE 0.5 balancing===================================') + smote_balanced_params = params_w_SMOTE_balan(dset_key, res_dir) + smote_balanced_params['sampling_ratio'] = 0.5 + smote_balanced_weights = make_pipeline_and_get_weights(smote_balanced_params) + (major_weight, minor_weight), (major_count, minor_count) = np.unique(smote_balanced_weights, return_counts=True) + # there should be twice as many major class as minor class + assert abs((major_weight*2) - minor_weight) < .0001 + assert abs(major_count - (minor_count * 2)) < .0001 + print('-==========================================') + +def params_w_SMOTE_balan(dset_key, res_dir): + # Try with SMOTE with ratio set to .50 + params = read_params( + make_relative_to_file('jsons/SMOTE_balancing_transformer.json'), + dset_key, + res_dir + ) + + return params def test_all_transformers(): """ @@ -331,6 +364,6 @@ def test_kfold_regression_transformers(): if __name__ == '__main__': test_kfold_regression_transformers() - #test_kfold_transformers() - #test_all_transformers() - #test_balancing_transformer() \ No newline at end of file + test_kfold_transformers() + test_all_transformers() + test_balancing_transformer() \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GCNModel.json b/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GCNModel.json index df299aee..f173fb3b 100644 --- a/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GCNModel.json +++ b/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GCNModel.json @@ -7,6 +7,8 @@ "data_owner": "username", "parser_version":"1.0", + "seed":"2015821819", + "comment": "Input file", "comment": "----------------------------------------", "comment": "Note: dataset_key must be a path/file name: E.G. ./dataset.csv", @@ -44,5 +46,11 @@ "comment": "Results", "comment": "----------------------------------------", - "result_dir": "result" + "result_dir": "result", + + "comment": "Test", + "comment": "----------------------------------------", + "comment": "with the seed, the result should be 0.9871578924396036.", + "perf_threshold": "0.98" + } diff --git a/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GraphConvModel.json b/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GraphConvModel.json index 37af3d78..29277b7c 100644 --- a/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GraphConvModel.json +++ b/atomsci/ddm/test/integrative/dc_models/reg_config_H1_fit_GraphConvModel.json @@ -50,5 +50,6 @@ "comment": "Test", "comment": "just needs to run, doesn't need to pass", "comment": "----------------------------------------", - "perf_threshold": "0.3" + "perf_threshold": "0.3", + "seed": "0" } diff --git a/atomsci/ddm/test/integrative/dc_models/test_retrain_dc_models.py b/atomsci/ddm/test/integrative/dc_models/test_retrain_dc_models.py index 39d057a1..1013b484 100644 --- a/atomsci/ddm/test/integrative/dc_models/test_retrain_dc_models.py +++ b/atomsci/ddm/test/integrative/dc_models/test_retrain_dc_models.py @@ -153,7 +153,7 @@ def train_and_predict(train_json_f, prefix='delaney-processed'): return tar_f -def verify_saved_params(original_json_f, tar_f): +def verify_saved_params(original_json_f, tar_f, keep_seed=False): """compares saved params in a tar file with original json""" reload_dir = tempfile.mkdtemp() with tarfile.open(tar_f, mode='r:gz') as tar: @@ -187,9 +187,20 @@ def verify_saved_params(original_json_f, tar_f): print(tar_feat_params) assert original_feat_params == tar_feat_params -def retrain(tar_f, prefix='H1'): + print('-----------------------------------') + print('seeds') + print(original_pp.seed) + print(tar_pp.seed) + assert original_pp.seed is not None + assert tar_pp.seed is not None + if keep_seed: + assert original_pp.seed == tar_pp.seed + else: + assert original_pp.seed != tar_pp.seed + +def retrain(tar_f, prefix='H1', keep_seed=False): """retrain a model from tar_f""" - model = mr.train_model_from_tar(tar_f, 'result') + model = mr.train_model_from_tar(tar_f, 'result', keep_seed=keep_seed) uuid = model.params.model_uuid re_tar_f = f'result/{prefix}_curated_fit_model_{uuid}.tar.gz' @@ -212,7 +223,7 @@ def H1_init(): # Train and Predict # ----- -def test_reg_config_H1_fit_AttentiveFPModel(): +def run_test_reg_config_H1_fit_AttentiveFPModel(keep_seed): if not llnl_utils.is_lc_system(): assert True return @@ -223,12 +234,16 @@ def test_reg_config_H1_fit_AttentiveFPModel(): verify_saved_params(json_f, tar_f) - re_tar_f = retrain(tar_f, 'H1') + re_tar_f = retrain(tar_f, 'H1', keep_seed=keep_seed) - verify_saved_params(json_f, re_tar_f) + verify_saved_params(json_f, re_tar_f, keep_seed=keep_seed) + +def test_reg_config_H1_fit_AttentiveFPModel(): + run_test_reg_config_H1_fit_AttentiveFPModel(True) + run_test_reg_config_H1_fit_AttentiveFPModel(False) # ----- -def test_reg_config_H1_fit_GCNModel(): +def run_test_reg_config_H1_fit_GCNModel(keep_seed): if not llnl_utils.is_lc_system(): assert True return @@ -239,12 +254,16 @@ def test_reg_config_H1_fit_GCNModel(): verify_saved_params(json_f, tar_f) - re_tar_f = retrain(tar_f, 'H1') + re_tar_f = retrain(tar_f, 'H1', keep_seed=keep_seed) - verify_saved_params(json_f, re_tar_f) + verify_saved_params(json_f, re_tar_f, keep_seed=keep_seed) + +def test_reg_config_H1_fit_GCNModel(): + run_test_reg_config_H1_fit_GCNModel(True) + run_test_reg_config_H1_fit_GCNModel(False) # ----- -def test_reg_config_H1_fit_MPNNModel(): +def run_test_reg_config_H1_fit_MPNNModel(keep_seed): if not llnl_utils.is_lc_system(): assert True return @@ -255,11 +274,15 @@ def test_reg_config_H1_fit_MPNNModel(): verify_saved_params(json_f, tar_f) - re_tar_f = retrain(tar_f, 'H1') + re_tar_f = retrain(tar_f, 'H1', keep_seed=keep_seed) - verify_saved_params(json_f, re_tar_f) + verify_saved_params(json_f, re_tar_f, keep_seed=keep_seed) -def test_reg_config_H1_fit_GraphConvModel(): +def test_reg_config_H1_fit_MPNNModel(): + run_test_reg_config_H1_fit_MPNNModel(True) + run_test_reg_config_H1_fit_MPNNModel(False) + +def run_test_reg_config_H1_fit_GraphConvModel(keep_seed): if not llnl_utils.is_lc_system(): assert True return @@ -270,11 +293,15 @@ def test_reg_config_H1_fit_GraphConvModel(): verify_saved_params(json_f, tar_f) - re_tar_f = retrain(tar_f, 'H1') + re_tar_f = retrain(tar_f, 'H1', keep_seed=keep_seed) - verify_saved_params(json_f, re_tar_f) + verify_saved_params(json_f, re_tar_f, keep_seed=keep_seed) -def test_reg_config_H1_fit_PytorchMPNNModel(): +def test_reg_config_H1_fit_GraphConvModel(): + run_test_reg_config_H1_fit_GraphConvModel(True) + run_test_reg_config_H1_fit_GraphConvModel(False) + +def run_test_reg_config_H1_fit_PytorchMPNNModel(keep_seed): if not llnl_utils.is_lc_system(): assert True return @@ -285,9 +312,13 @@ def test_reg_config_H1_fit_PytorchMPNNModel(): verify_saved_params(json_f, tar_f) - re_tar_f = retrain(tar_f, 'H1') + re_tar_f = retrain(tar_f, 'H1', keep_seed=keep_seed) - verify_saved_params(json_f, re_tar_f) + verify_saved_params(json_f, re_tar_f, keep_seed) + +def test_reg_config_H1_fit_PytorchMPNNModel(): + run_test_reg_config_H1_fit_PytorchMPNNModel(True) + run_test_reg_config_H1_fit_PytorchMPNNModel(False) if __name__ == '__main__': test_reg_config_H1_fit_PytorchMPNNModel() # Pytorch implementation of MPNNModel diff --git a/atomsci/ddm/test/integrative/delaney_NN/test_delaney_NN.py b/atomsci/ddm/test/integrative/delaney_NN/test_delaney_NN.py index c88adfcd..1b9e874f 100644 --- a/atomsci/ddm/test/integrative/delaney_NN/test_delaney_NN.py +++ b/atomsci/ddm/test/integrative/delaney_NN/test_delaney_NN.py @@ -103,7 +103,7 @@ def test(): # Check training statistics # ------------------------- - integrative_utilities.training_statistics_file(reload_dir, 'test', 0.6) + integrative_utilities.training_statistics_file(reload_dir, 'test', 0.55) # Make prediction parameters # -------------------------- diff --git a/atomsci/ddm/test/integrative/delaney_Panel/jsons/class_config_delaney_fit_NN_ecfp.json b/atomsci/ddm/test/integrative/delaney_Panel/jsons/class_config_delaney_fit_NN_ecfp.json index 94683dcb..39a7cf99 100644 --- a/atomsci/ddm/test/integrative/delaney_Panel/jsons/class_config_delaney_fit_NN_ecfp.json +++ b/atomsci/ddm/test/integrative/delaney_Panel/jsons/class_config_delaney_fit_NN_ecfp.json @@ -37,5 +37,7 @@ "comment": "Results", "comment": "----------------------------------------", - "result_dir": "result" + "result_dir": "result", + + "seed":"3173915729" } diff --git a/atomsci/ddm/test/integrative/delaney_Panel/jsons/reg_config_delaney_fit_NN_graphconv.json b/atomsci/ddm/test/integrative/delaney_Panel/jsons/reg_config_delaney_fit_NN_graphconv.json index dd5d7fdf..aa8461da 100644 --- a/atomsci/ddm/test/integrative/delaney_Panel/jsons/reg_config_delaney_fit_NN_graphconv.json +++ b/atomsci/ddm/test/integrative/delaney_Panel/jsons/reg_config_delaney_fit_NN_graphconv.json @@ -40,6 +40,7 @@ "comment": "Test", "comment": "----------------------------------------", - "perf_threshold": "0.50" + "perf_threshold": "0.50", + "seed":0 } diff --git a/atomsci/ddm/test/integrative/delaney_RF/config_delaney_fit_RF.json b/atomsci/ddm/test/integrative/delaney_RF/config_delaney_fit_RF.json index 0aaf9a37..0941eb2e 100644 --- a/atomsci/ddm/test/integrative/delaney_RF/config_delaney_fit_RF.json +++ b/atomsci/ddm/test/integrative/delaney_RF/config_delaney_fit_RF.json @@ -25,6 +25,7 @@ "comment": "Model", "comment": "----------------------------------------", "model_type": "RF", + "seed": "0", "comment": "Results", "comment": "----------------------------------------", diff --git a/atomsci/ddm/test/integrative/integrative_utilities.py b/atomsci/ddm/test/integrative/integrative_utilities.py index 8db0c9f1..58afff4f 100644 --- a/atomsci/ddm/test/integrative/integrative_utilities.py +++ b/atomsci/ddm/test/integrative/integrative_utilities.py @@ -2,6 +2,7 @@ import json import os import shutil +import pandas as pd def clean_fit_predict(): @@ -85,3 +86,36 @@ def copy_delaney(dest='.'): '../test_datasets/delaney-processed.csv')) shutil.copy(delaney_source, dest) + +def extract_seed(metadata_path): + with open(metadata_path, 'r') as f: + metadata = json.load(f) + return metadata.get('seed') + +def modify_params_with_seed(pparams, seed): + pparams.seed = seed + return pparams + +def get_test_set(dataset_key, split_csv, id_col): + """ + Read the dataset key and split_uuid to split dataset into split components + + Parameters: + - dataset_key: path to csv file of dataset + - split_uuid: path to split csv file + - id_col: name of ID column + + Returns: + - train, valid, test dataframe + """ + df = pd.read_csv(dataset_key) + split_df=pd.read_csv(split_csv) + test_df = df[df[id_col].isin(split_df[split_df['subset']=='test']['cmpd_id'])] + + return test_df + +def find_best_test_metric(model_metrics): + for metric in model_metrics: + if metric['label'] == 'best' and metric['subset']=='test': + return metric + return None diff --git a/atomsci/ddm/test/integrative/model_retrain/config.json b/atomsci/ddm/test/integrative/model_retrain/config.json new file mode 100644 index 00000000..f962baf6 --- /dev/null +++ b/atomsci/ddm/test/integrative/model_retrain/config.json @@ -0,0 +1,35 @@ +{ + "comment": "Input file", + "comment": "----------------------------------------", + "comment": "Note: dataset_key must be a path/file name: E.G. ./dataset.csv", + "id_col": "Id", + "smiles_col": "smiles", + "class_number": "3", + + "comment": "Split", + "comment": "----------------------------------------", + "splitter": "random", + + "comment": "Prediction Type", + "comment": "----------------------------------------", + "response_cols": "sol_category", + "prediction_type": "classification", + + "comment": "Features", + "comment": "----------------------------------------", + "featurizer": "ecfp", + + "comment": "Model", + "comment": "----------------------------------------", + "model_type": "NN", + "dropout": ".01,.01,.01", + "layer_sizes": "256,50,18", + "learning_rate": "0.00007", + "max_epochs": "25", + + "comment": "Training", + "comment": "----------------------------------------", + "comment": "This regulates how long to train the model", + "early_stopping_patience": "2" +} + diff --git a/atomsci/ddm/test/integrative/model_retrain/example.csv b/atomsci/ddm/test/integrative/model_retrain/example.csv new file mode 100644 index 00000000..92ef31ec --- /dev/null +++ b/atomsci/ddm/test/integrative/model_retrain/example.csv @@ -0,0 +1,751 @@ +Id,smiles,sol_category +EOS12286,Cc1nc(N2CCN(C(=O)Nc3ccc(F)cc3F)CC2)cc(-n2ccnc2)n1,0 +EOS85869,CCN(CC)[C@H]1CCN(C(=O)Cc2nc(C(C)C)c(C)s2)C1,0 +EOS85435,CNC(=O)CNC(=O)c1c(-n2cccc2)sc(C)c1C,0 +EOS102302,CC(C)(C)c1ccc(CSc2cnn(C(C)(C)C)c(=O)c2Cl)cc1,0 +EOS64213,CC[C@H](NC(=O)c1ccnc(-n2ccnc2)c1)c1ccccc1OC,0 +EOS68602,Cn1c(SCC(=O)Nc2ccccc2F)nnc1-c1ccncc1,0 +EOS90323,CCN(Cc1ccc2c(c1)OCO2)C(=O)C1=NN([C@H]2CCS(=O)(=O)C2)C(=O)CC1,0 +EOS4636,COc1ccc(CN2C[C@@H]3CC[C@H](C2)N(Cc2ccccc2)C3=O)cc1O,0 +EOS72860,C[C@@H]1Oc2ccc(NC(=O)COCc3nc4ccccc4s3)cc2NC1=O,0 +EOS18993,COc1cccc(-c2nc3n(c2C(=O)Nc2ccc4c(c2)OCO4)CCS3(=O)=O)c1,0 +EOS47744,C[C@H]1CCCN1S(=O)(=O)N(C)c1ccc(O)cc1,0 +EOS70302,O=C(Nc1ccccc1OCC1CC1)N1CCCC[C@@H]1CN1CCOCC1,0 +EOS84678,FC(F)(F)c1c[nH]c(NCc2ccccc2Cl)n1,0 +EOS67976,CCOc1cccc2c1OCC(C(=O)NS(=O)(=O)Cc1ccc(F)cc1)=C2,0 +EOS48390,O=C(Nc1ccnn1-c1cccc(F)c1)c1ccnc(-n2cncn2)c1,0 +EOS84428,Cc1ccc2c(c1)NC(=O)[C@]2(O)CC(=O)c1cccs1,0 +EOS23838,O=C(CN1C(=O)CSc2ccc(S(=O)(=O)N3CCOCC3)cc21)NCc1ccccc1,0 +EOS23248,CCOc1ccc(NC(=O)c2ccc(-n3cnnn3)cc2)cc1,0 +EOS12014,COc1ccc(OC)c(N(CC(=O)Nc2cccnc2)S(=O)(=O)c2ccc(OC)c(OC)c2)c1,0 +EOS70996,COc1ccc(OC)c([C@@H]2CCCN2S(=O)(=O)c2ccccc2C#N)c1,0 +EOS86630,COCCn1cc(NC(=O)N2CCN(Cc3cc(C)no3)CC2)cn1,0 +EOS101681,COc1c(C)cc(NC(=O)c2nn(Cc3ccc(F)cc3)c3c2CN(C(=O)c2ccc[nH]2)C[C@]3(C)C(N)=O)cc1C,0 +EOS76444,CC(=O)c1ccc(CNC(=O)C#Cc2ccccc2)nc1C,0 +EOS317,O=C(c1cccc(N2CCCCS2(=O)=O)c1)N1CCc2ccccc21,0 +EOS19869,CCN(CC)C(=O)c1ccc2c(c1)CC(=O)N2Cc1cccc(F)c1,0 +EOS26424,CCCn1cc(C(=O)Nc2ccc3c(c2)OCCO3)c(=O)c2ccc(C)nc21,0 +EOS12820,COc1ccc2[nH]c(SCC(=O)N3CCOCC3)nc2c1,0 +EOS100258,CC1(C)CN(C(=O)c2ccc(-c3cccc4nc(NC(=O)C5CC5)nn34)cc2)C1,0 +EOS62095,O=C(Nc1ccc(OC2CCCC2)nc1)[C@@H]1CCCc2[nH]ncc21,0 +EOS49570,Cc1ccccc1[C@@H](NS(=O)(=O)C1CCS(=O)(=O)CC1)C1CC1,0 +EOS55935,COc1cc(OC)c(NC(=O)c2cc(=O)n(-c3ccc(C)cc3)[nH]2)cc1F,0 +EOS4688,COc1cccc(C(=O)N2CCN(C(=O)[C@@H]3CCCN3C(C)=O)CC2)c1,0 +EOS14843,CC(Cc1ccccc1)[n+]1[cH-]/c(=N\C(N)=O)on1,0 +EOS72732,CCCN(Cc1ccc(C#N)cc1)[C@H]1CCS(=O)(=O)C1,0 +EOS86828,C[C@@H](c1ccccc1)N1CCN(C(=O)Cn2cnn(C)c2=O)CC1,0 +EOS50535,CN(C)CCCOC1CCN(c2ccnc3c(F)cccc23)CC1,0 +EOS101685,O=C(NC1CCN(CCCCC2(C(=O)NCC(F)(F)F)c3ccccc3-c3ccccc32)CC1)c1ccccc1-c1ccc(C(F)(F)F)cc1,0 +EOS2424,COc1ccc(Cl)cc1NC(=O)N1CCN(c2cc(N(C)C)nc(C)n2)CC1,0 +EOS74181,CCn1cnnc1[C@H]1CCCN(C(=O)Nc2ccc(C)c(Cl)c2)C1,0 +EOS29352,Cc1cc(=O)n(CC(=O)N2CCN(c3ccccc3F)CC2)c(-c2ccc(F)cc2)n1,0 +EOS20523,CC(C)N1CCC(OCCCNC(=O)C23CCCC=C2N(C2CCCC2)C(=O)CC3)CC1,0 +EOS11203,CCCCNc1ccc(C(=O)N2CCC(Oc3cccnc3)CC2)cn1,0 +EOS69500,CNC(=O)C1CN(C(=O)COCC(F)(F)F)C1,0 +EOS54740,Cc1c(O)ccc(C(Cc2ccc3ccccc3n2)=NO)c1O,0 +EOS24531,CC(C)OCCCNC(=O)C1CCCN(c2ccc(-c3ccccc3)nn2)C1,0 +EOS85309,Cc1nnc([C@H](C)N(C)CCCNC(=O)C2CCC2)s1,0 +EOS71340,COc1ccc(CC(=O)N2CCCN(c3ccccc3C#N)CC2)cc1,0 +EOS73454,CCCc1nc(CN2CCC[C@@H]2Cn2cncn2)cs1,0 +EOS95892,O=C(Nc1ccc2c(c1)OC1(CCCC1)O2)N1CCN(C(=O)c2ccco2)CC1,0 +EOS71024,COc1cccc(C(=O)N2CCCC[C@H]2c2nc(C)cs2)c1OC,0 +EOS20280,CC(C)C[C@H]1C(=O)N2C[C@@H](N(C)C)C[C@H]2CN1C(=O)CCc1nc(-c2ccccc2)no1,0 +EOS75115,Cc1noc(C)c1CN1CCC[C@H]1c1cccs1,0 +EOS4204,CC(=O)NCCC1CCCCN1S(=O)(=O)c1cc(C)sc1C,0 +EOS1080,Cc1ccc(-c2noc(C(=O)N3CCCCC3)n2)cc1S(=O)(=O)Nc1cccnc1,0 +EOS71657,O=C(Nc1cc(S(=O)(=O)N2CCOCC2)ccc1O)c1ccccc1OCc1ccccc1,0 +EOS30361,CCOCc1nc(C2CCCN(C(=O)c3ccccc3)C2)no1,0 +EOS86322,C[C@H]1CCCN(CC(=O)NCc2ccc3c(c2)OCO3)C1,0 +EOS47656,CO[C@@H]1C[C@@H](c2ncn[nH]2)N(C(=O)c2cc(C(C)(C)C)n[nH]2)C1,0 +EOS84451,CC(=O)N1N=C(c2ccc3c(c2)OCO3)C[C@H]1c1cccc(F)c1,0 +EOS68460,CC1CCN(c2nnc(S[C@@H](C(=O)N3CCOCC3)c3ccccc3)n2C2CC2)CC1,0 +EOS94000,O=C(Cn1[nH]c(=O)c2ccccc2c1=O)NCCCCOc1ccc(Cl)cc1,0 +EOS100419,COc1cc2c(cc1OC)CN(CCc1ccc(NC(=O)c3cc(OC)c(OC)cc3NC(=O)c3cnc4ccccc4c3)cc1)CC2,0 +EOS53202,CN(Cc1nc2ccccc2c(=O)[nH]1)Cc1ccccc1Br,0 +EOS72157,Cc1nc2cc(=O)[nH]n2c(C)c1CC(=O)Nc1cccc(F)c1,0 +EOS32717,COC[C@@H]1CC(F)(F)CN1C1CN(C(=O)c2ccc(OC)c(F)c2)C1,0 +EOS64116,Cc1ccc(N2C[C@@H](C(=O)N3CCN(S(=O)(=O)c4ccc5c(c4)OCCCO5)CC3)CC2=O)cc1,0 +EOS18710,CCCNC(=O)CSc1nc2c(C)nn(CC)c2c(=O)n1Cc1cccs1,0 +EOS75229,Cc1cc(=O)c(C(=O)N[C@@]2(CCO)CCOC2)nn1-c1cccc(C(F)(F)F)c1,0 +EOS101261,COc1cccc(CNc2ccc(S(=O)(=O)Nc3nc4ccccc4s3)cc2)c1O,0 +EOS63484,CCN(CC(=O)Nc1cccc(CN(C)C(C)=O)c1)c1ccccc1,0 +EOS52662,Cc1cn(CC(=O)N(C2CCCC2)[C@H]2CCS(=O)(=O)C2)c(=O)n1-c1ccc(C#N)cc1,0 +EOS75633,COCC(=O)N[C@@H](C)C(=O)N1CCN(S(=O)(=O)c2cccc3c2N=S=N3)CC1,0 +EOS48965,Cc1nc(NC(=O)N[C@@H](C)c2cnn(C)c2)sc1C,0 +EOS75232,O=C(NS(=O)(=O)CCCF)[C@@H]1CSc2ccccc21,0 +EOS101077,Cc1cc(NC(=O)Nc2ccc(N(C)C)cc2)c2cc(F)cc(F)c2n1.Cl,0 +EOS68367,O=C(Nc1[nH]c(=O)ncc1F)c1ccc(Cl)cc1Br,0 +EOS74964,CN(C)CCCNC(=O)c1cn(-c2ccccc2)nc1-c1cccnc1.Cl,0 +EOS73806,Cc1nc(C)c(C(=O)N2CCC[C@H]2c2cccnc2)s1,0 +EOS60795,CC(C)c1ccc(-c2nnc(NC(=O)C3=COCCO3)s2)cc1,0 +EOS85379,COc1ccc(OC)c(C(=O)Nc2cccc3c(OC)ccnc23)c1,0 +EOS51677,COc1ccc([C@@H]2CCCN2C(=O)c2ccc(S(=O)(=O)NC3CC3)cc2)c(OC)c1,0 +EOS69628,CCCCOc1ccc(S(=O)(=O)Nc2cc(OC)ccc2OC)cc1,0 +EOS20313,O=C(c1cscn1)N1C2CCC1Cn1c(nnc1-c1cccnc1)C2,0 +EOS40006,CC#CCN(c1ccccc1F)S(=O)(=O)CC,0 +EOS62200,Cc1ccc(-n2[nH]c(C(=O)Nc3cccn(CC(F)(F)F)c3=O)cc2=O)cc1,0 +EOS4603,Cc1cc2c(cc1NC(=O)N1CCCC(c3ccncn3)C1)n(C)c(=O)n2C,0 +EOS84475,COCCN1C(=O)C(O)=C(C(=O)c2ccc(OC)cc2)[C@H]1c1ccc(OC)cc1,0 +EOS32515,CCOCc1cnc(C)nc1C1CCCN(C(=O)c2cc(C)n(C)n2)C1,0 +EOS83045,C[C@]1(NC(=O)CCOc2ccc(F)cc2)CCS(=O)(=O)C1,0 +EOS64178,O=c1[nH]c(CN2CCN(S(=O)(=O)c3ccc4ccccc4c3)CC2)nc2ccsc12,0 +EOS69525,CN(Cc1coc(-c2ccccc2)n1)S(=O)(=O)c1ccc(F)cc1,0 +EOS72007,C[C@H](NC(=O)NC1CCC2(CC1)COC2)c1cnn(CC2CCC2)c1,0 +EOS68620,C[C@@H](CNC(=O)c1c(F)cccc1Cl)N1CCCC1,0 +EOS34716,O=C(c1ccn2nnnc2c1)N1C[C@@H]2C[C@H](C1)Cn1c2cccc1=O,0 +EOS100506,Cc1c[nH]c2ncnc(N3CCC(CN)(C(=O)Nc4cccc(OC(=O)N(C)C)c4)CC3)c12,0 +EOS32572,COCc1nnc2n1CC1CCC(C2)N1C(=O)CCc1ccc(OC)cc1,0 +EOS42051,CCC[C@]1(CO)CCN(C(=O)c2cccnc2C2CC2)C1,0 +EOS102362,Cc1ccc(-n2ncc3c(=O)n(CC(=O)NCc4ccco4)cnc32)cc1,0 +EOS94889,CCCn1c(NC(=O)c2ccncc2)nc2ccccc21,0 +EOS85437,C#Cc1cccc(NC(=O)CN(C)c2ncc(Cl)cn2)c1,0 +EOS28121,O=C(Nc1ccccc1F)N1CCC(n2cnc3cc(F)ccc3c2=O)CC1,0 +EOS74746,C#CCSCCNC(=O)N(CC)c1ccccc1,0 +EOS83980,O=C(CCNC(=O)c1c[nH]c2ccccc12)N[C@H]1CCCCNC1=O,0 +EOS101619,Cc1ccc(S(=O)(=O)O)cc1.Cn1ncnc1[C@H]1c2n[nH]c(=O)c3cc(F)cc(c23)N[C@@H]1c1ccc(F)cc1,0 +EOS101380,N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)[O-].[Na+],0 +EOS74404,Cc1cc(C)n(C[C@H]2CCCN2Cc2ncc(C(C)(C)C)o2)n1,0 +EOS39466,O=C1Nc2ccc(F)cc2C2(CNC2)O1,0 +EOS100771,Cl.O=C1Nc2ccccc2C2(CCN(CCc3ccc(C(F)(F)F)cc3)CC2)O1,0 +EOS97473,COc1ccnc(NC[C@H](O)c2ccccc2C(F)(F)F)n1,0 +EOS101061,CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)[C@H](C(=O)C=C4[C@@]3(C)CC[C@H]3C(C)(C)C(=O)C(C#N)=C[C@]43C)[C@@H]2C1,0 +EOS70993,CCCCNc1ccc(S(C)(=O)=O)cc1S(C)(=O)=O,0 +EOS35211,Cc1ccc(C)n1CCC(=O)Nc1ccc2[nH]c(CO)nc2c1,0 +EOS30963,COCCOCc1nc(C2CCCCN2C(=O)Cc2cccc(Cl)c2)no1,0 +EOS6650,CN(CCCS(=O)(=O)N(C)C)Cc1n[nH]c2c1CCCCC2,0 +EOS84489,O=c1nc(C(F)(F)F)nc2ccccn12,0 +EOS85707,CCOCC(=O)N1c2ccccc2N(C)CC[C@@H]1C,0 +EOS68429,COc1cc(C(=O)Nc2ccc(C#N)cc2)cc2c1OCCO2,0 +EOS75116,C[C@H](NCc1cccc(NC(=O)Cn2cccn2)c1)c1cccs1,0 +EOS101549,CCN1CCN(C(=O)Cc2ccc(Nc3ncc(F)c(Nc4ccc(C(=O)Nc5ccccc5Cl)cc4)n3)cc2)CC1,0 +EOS2236,COc1ccc(Nc2nc(N3CCN(S(C)(=O)=O)CC3)nc3ccccc23)cc1,0 +EOS38187,CC(=O)N1CCc2cc(NC(=O)c3cc4ccccc4cc3O)ccc21,0 +EOS60861,Cc1ccc(NC(=O)CCc2ccccc2)cc1S(=O)(=O)N1CCOCC1,0 +EOS71172,Cc1cnn(C[C@H]2CN(Cc3ccoc3C)CCO2)c1,0 +EOS101808,Cl.O=C(O)Cc1cccc(OCCCN(Cc2cccc(C(F)(F)F)c2Cl)CC(c2ccccc2)c2ccccc2)c1,0 +EOS40031,O=S1(=O)CCCN(Cc2cn3ccccc3n2)CC1,0 +EOS73643,COc1cc(CNC(=O)C(C)C)ccc1OC[C@H]1CCCO1,0 +EOS85429,Cl.Cn1cc(CN)c(-c2ccc(C#N)cc2)n1,0 +EOS93043,CNC(=O)c1ccc(CSC(C)(C)C)cc1,0 +EOS102237,Cc1coc2c1C(=O)C(=O)c1c-2ccc2c1CCCC2(C)C,0 +EOS72582,COc1ccc(OC)c([C@@H]2CCCN2C(=O)c2cn(C(C)C)cn2)c1,0 +EOS57426,CCN(CC)c1ccc(C(=O)N2CCC[C@H]2c2noc(C)n2)cn1,0 +EOS53552,CCOc1ccc(S(=O)(=O)Nc2ccc([C@@]3(C)NC(=O)NC3=O)cc2)cc1OCC,0 +EOS20732,CC(C)Cn1ccc2c(NC(=O)c3cnc4sccn4c3=O)cccc21,0 +EOS51286,Cc1cc(NC(=O)NC[C@H](O)COc2cccc3ccccc23)no1,0 +EOS60822,Cn1c(=O)c2c(ncn2CC(=O)Nc2nc(-c3ccc4c(c3)OCCO4)cs2)n(C)c1=O,0 +EOS19194,COc1ccnc2c1c(=O)n(CC(=O)Nc1ccc3c(c1)OCO3)c(=O)n2C,0 +EOS4613,CC1CCCN1C(=O)c1ccc2nc(-c3ccc(F)cc3)cn2c1,0 +EOS64357,CC(=O)N(C)C1CCN(C(=O)c2ccc3cc[nH]c3c2)CC1,0 +EOS73381,CN1C[C@@H]2[C@@H](CCCN2C(=O)NCc2cccc(COCC(F)(F)F)c2)C1=O,0 +EOS48062,C[C@@H](CC#N)NC(=O)c1ccnc(SC(C)(C)C)c1,0 +EOS101656,Cl.Clc1ccccc1CN1CCc2sccc2C1,0 +EOS101295,COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1NC(=O)/C=C/CN1CCCCC1,0 +EOS86660,N#CCCN(CCC(F)(F)F)C(=O)Cn1nc2n(c1=O)CCCCC2,0 +EOS74770,Cc1noc(C)c1C[C@H](C)C(=O)N1CC(n2cc(-c3ccccc3)nn2)C1,0 +EOS67998,Cc1ccc(C)c(S(=O)(=O)NC(=O)CC[C@H]2CCCO2)c1,0 +EOS968,CCCC(=O)Nc1nn2c(-c3ccc(C)c(F)c3)nnc2s1,0 +EOS38460,COc1cc(OC)cc(N2CC[C@H](NC(=O)N3Cc4cccc(OC)c4C3)C2)c1,0 +EOS97123,O=C(NCCOc1ccccc1F)[C@H]1CC(=O)N(Cc2ccccc2)C1,0 +EOS45917,C[C@H](NC(=O)c1ccoc1)C(=O)Nc1cccc(CN2C(=O)CNC2=O)c1,0 +EOS83217,CCCN(C(=O)c1nn(-c2ccccc2C(F)(F)F)c(C)cc1=O)[C@H]1CCS(=O)(=O)C1,0 +EOS49461,CCN(CCN(C)CC(F)(F)F)C(=O)c1ccccc1-n1cccn1,0 +EOS50218,Cc1n[nH]cc1CN1C(=O)[C@@H]2CCCCN2C1=O,0 +EOS4606,COc1ccccc1-n1cc(C(=O)N2CCCCC2c2cc(C)on2)cn1,0 +EOS61531,CCc1ccc(NC(=O)NC[C@H]2CCS(=O)(=O)C2)cc1,0 +EOS20410,O=C(c1cn(C[C@@H]2CCCN2C2CCOCC2)nn1)N1CCc2ccccc2C1,0 +EOS84305,Cn1nc(N2CCN(c3ccc(F)cc3)CC2)c(=O)n(C)c1=O,0 +EOS41541,CCn1c(=O)n(CC(=O)N2CCCN3c4ccccc4C[C@H]3C2)c2ccccc21,0 +EOS100489,C[C@@H]1COCCN1c1cc(=O)n2c(n1)N(Cc1cncc(Cl)c1)[C@H](C(F)(F)F)CC2,0 +EOS57657,C[C@]1(C(=O)N2CCCc3occc3C2)CCC(=O)NC1,0 +EOS68318,N#C[C@@H](NC(=O)c1ccc2[nH]nnc2c1)C1CCCCC1,0 +EOS70350,CCn1cc(NC(=O)c2cc(-c3cccc(Cl)c3)no2)ccc1=O,0 +EOS101803,CN1CCc2cc3c(cc2[C@H]1[C@@H]1OC(=O)c2c1ccc1c2OCO1)OCO3,0 +EOS61452,O=S(=O)(c1ccc(N2CCN(c3ncnc4c3oc3ccccc34)CC2)nc1)N1CCOCC1,0 +EOS32960,CC1(c2noc(C3CCOCC3)n2)CCCN(C(=O)c2ccccn2)C1,0 +EOS101363,Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1,0 +EOS100836,CCC(C)n1ncn(-c2ccc(N3CCN(c4ccc(OC[C@H]5CO[C@](Cn6cncn6)(c6ccc(Cl)cc6Cl)O5)cc4)CC3)cc2)c1=O,0 +EOS40062,O=C(CN1CCC[C@H](O)C1)N1CCN(Cc2ccccc2)CC1,0 +EOS53751,CC(C)CCNC(=O)Cn1cnc2c(oc3ccccc32)c1=O,0 +EOS28747,CC(=O)Nc1ccc(S(=O)(=O)Nc2ccc3c(c2)CCC(=O)N3C)cc1,0 +EOS68356,C[C@@H](c1ccco1)N(C)C(=O)c1cc(O)nc(N(C)C)n1,0 +EOS61487,CCCNC(=O)[C@H](C)NC(=O)CCCn1cnc2c(C)cccc2c1=O,0 +EOS74344,O=C(Nc1ccncc1)N[C@H]1CCS(=O)(=O)C1,0 +EOS22069,CCn1nc(N2CCCC(NS(=O)(=O)c3ccc(F)c(C)c3)C2)ccc1=O,0 +EOS75113,COc1ccc(NC(=O)NC[C@H](C)Cn2nc(C)cc2C)cn1,0 +EOS14093,Cc1nc(-c2ccc(S(=O)(=O)N3CCN(c4cc(-n5ccnc5)ncn4)CC3)s2)cs1,0 +EOS84289,CN1CC(=O)N=C1NC(=O)Nc1ccc(O)cc1,0 +EOS12353,CC(C)c1ccccc1NC(=O)Cn1c(=O)oc2ccccc21,0 +EOS48318,CC(C)[C@@H](c1ccccc1)S(=O)(=O)NC(=O)c1cn2c(n1)CCC2,0 +EOS68001,CCONC(=O)c1csc(-c2ccc(OC(C)C)cc2)n1,0 +EOS95884,Cc1nccn1C[C@@H](C)CNC(=O)CCCn1c(=O)oc2ccccc21,0 +EOS18705,CCCN(CCC)CCCNC(=O)CCNC(=O)Cn1ccc2ccccc2c1=O,0 +EOS68358,O=C(Nc1ccccc1Cl)N1CCN(Cc2ccccn2)CC1,0 +EOS70986,O=C([C@@H]1CN(Cc2nnsc2Cl)c2ccccc2O1)N1CCCCC1,0 +EOS655,COc1ccc(OC)c(C2CC(c3c(O)[nH]c(=O)n(C4CCCCC4)c3=O)=NN2)c1,0 +EOS25087,COc1ccc(NC(=O)N2CCN(c3nc4ccccc4n(Cc4ccccc4)c3=O)CC2)c(OC)c1,0 +EOS74565,CCn1nnc(C)c1CS(=O)(=O)CC1(C)CCCCC1,0 +EOS75894,Cc1ccc(C#N)cc1S(=O)(=O)N1CCN[C@@H](C)C1,0 +EOS73711,CNS(=O)(=O)c1ccc(C(=O)Nc2ccc(-n3ccnc3C)c(F)c2)o1,0 +EOS63830,CCc1ccc(C)cc1OCc1nnc(C)n1CC,0 +EOS75530,CCc1ccc(S(=O)(=O)N2CC[C@H](CNC)C2)cc1,0 +EOS102405,C[N+](C)(C)CC#CCN1CCCC1=O.[I-],0 +EOS73193,CCN(CC)S(=O)(=O)c1cccc(C(=O)Nc2cccc(N3CCCC3=O)c2)c1,0 +EOS64470,CCS(=O)(=O)N1CCC(=NO)CC1,0 +EOS100139,Cc1c(CN(C)C(=O)/C=C/C2=CNC3=NC(=O)CCC3=C2)oc2ccccc12,0 +EOS85018,COCc1cc(C(=O)N2Cc3ccccc3N(C)C[C@@H]2C)no1,0 +EOS79004,OC[C@H](Nc1cc(C(F)(F)F)ccn1)c1ccccc1,0 +EOS68364,CCc1ccc([C@H](C)NCC(C)(C)C(N)=O)s1,0 +EOS100502,COc1cc2c(cc1F)C(c1ccccc1Cl)=NC1=C(C)NNC1=N2,0 +EOS11756,Cc1ccc(OCC(=O)N2CCc3ccccc32)cc1C,0 +EOS38930,O=C(CCNC(=O)[C@@H]1COCCN1CC1CCC1)Nc1ccccc1,0 +EOS2381,Cc1c2c(=O)n(-c3nc4ccccc4s3)[nH]c2cc(=O)n1Cc1cccs1,0 +EOS75463,Cc1ccc(NC(=O)CN(C)S(C)(=O)=O)cc1S(=O)(=O)N1CCOCC1,0 +EOS68402,CC(=O)Nc1ccc(NC(=O)c2cccc(OCc3cscn3)c2)cc1,0 +EOS68555,Cc1cn2c(n1)CC[C@H](NC(=O)c1cc(C)oc1C)C2,0 +EOS30122,CCn1c(=O)c2c(cc(C)n2C)n(CC(=O)Nc2ccc(OC)c(Cl)c2)c1=O,0 +EOS84785,CCONC(=O)c1nn(-c2ccccc2F)cc1O,0 +EOS50836,c1ccc([C@H]2N(c3ncnc4[nH]ccc34)CC23CCOCC3)cc1,0 +EOS69603,O=C(Cn1cnc2scc(-c3cccs3)c2c1=O)Nc1cccnc1,0 +EOS72529,C[C@@H](CN1CCCCC1)NC(=O)Cc1ccn[nH]1,0 +EOS37268,COc1ccc(NC(=O)c2ccc(NC(=O)c3cnn(C)c3)cc2)cc1,0 +EOS100138,CNC(=O)O[C@H]1COc2ccc(N3CCN(C4COC4)CC3)cc2[C@@H]1NC(=O)c1ccc(F)cc1,0 +EOS38167,COc1ccc2nc(C(=O)Nc3ccc4[nH]c(=O)[nH]c4c3)ccc2c1,0 +EOS57195,O=C(Cn1ccc2ccc(Cl)cc21)NC[C@]1(O)CCS(=O)(=O)C1,0 +EOS53366,Clc1ccccc1-c1noc(CSc2nnnn2C[C@H]2CCCO2)n1,0 +EOS69457,Nc1nc(C2CC2)nc(N2CCN(C[C@H]3CCC4(CCC4)O3)CC2)n1,0 +EOS71052,Cc1cc(C)n(C[C@H]2CCCN2C(=O)c2ccoc2C)n1,0 +EOS13687,Cc1ccc2nc(Nc3ccccc3)c(/C=N/O)c(=O)n2c1,0 +EOS71163,Cc1nc(-c2cccc(NC(=O)CN[C@H]3CCC(=O)NC34CCC4)c2)cs1,0 +EOS71861,N#CCCN(CC1CC1)C(=O)[C@H](O)c1ccc(Br)cc1,0 +EOS10022,Cc1c(NC(=O)N[C@H]2COC[C@@H]2N2CCCC2)cccc1N1CCCC1,0 +EOS55390,O=C(Nc1nc2ccccc2[nH]1)c1c[nH]n2c1nc(=O)c1ccccc12,0 +EOS72237,O=C1C[C@H](NC(=O)c2ocnc2C2CC2)CN1,0 +EOS21724,CC(=O)c1ccc(NC(=O)C(CC(C)C)NS(=O)(=O)c2ccc3c(c2)oc(=O)n3C)cc1,0 +EOS38052,CCN(C)C[C@H]1CCN(C(=O)CCNc2ncc(C(F)(F)F)cc2Cl)C1,0 +EOS19865,CCCNC(=O)c1cccc2c1CCN2C(=O)c1ccc(F)cc1,0 +EOS85253,COc1ccccc1O[C@H]1CCCN(C(=O)c2cnc3c(cnn3C)c2OC)C1,0 +EOS70677,CO[C@@](C)(CNC(=O)c1ccc(=O)[nH]n1)C1CC1,0 +EOS101606,CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21,0 +EOS57141,C[C@H](NCCN(C)Cc1ccccc1)c1cn(C2COC2)nn1,0 +EOS31524,Cc1cnc(C)n1C1CCCN(C(=O)c2cccs2)C1,0 +EOS68188,C[C@H]1CN(C(=O)c2cccnc2N2CCOCC2)CCO1,0 +EOS64373,CCCNC(=O)c1ccc(NC(=O)[C@@H]2COCCN2CC2CCC2)cc1Cl,0 +EOS21404,CC(C)Cn1c(=O)c2ccc(C(=O)NC3CCCC3)cc2n2c(=O)[nH]nc12,0 +EOS39492,COc1ccccc1[C@@H]1CC(c2ccco2)=NN1S(C)(=O)=O,0 +EOS39892,O=C(NCCCCc1nccs1)[C@@H]1OCCc2ccccc21,0 +EOS19808,O=c1c2c[nH]c3ccccc3c-2nn1-c1ccc(Cl)cc1,0 +EOS4028,CNCCN1C(=O)CC[C@H]2CN(C(=O)C3(COC)CCC3)CC[C@H]21.Cl,0 +EOS84786,Cc1nnc2ccc(N3CC[C@]4(CCOC4)C3)nn12,0 +EOS72231,COCCOc1cccc(C(=O)Nc2[nH]cc(C)c2C#N)c1,0 +EOS68564,Cc1nc([C@H]2CCCN(C(=O)COc3ccc(C#N)cc3)C2)no1,0 +EOS74273,O=C(CSc1cccs1)NC[C@H]1CCCO1,0 +EOS78190,CCCN1CCN(Cc2nnc(C)n2CC)c2cc(F)ccc21,0 +EOS11937,CCc1ccc(OCC(=O)Nc2ccc(S(=O)(=O)N(CC)CC)cc2)cc1,0 +EOS46373,CN(C)C(=O)c1cccc(S(=O)(=O)N2CCn3c(Br)cnc3C2)c1,0 +EOS28807,COc1ccc(NC(=O)Cn2nc3c(-c4nc(-c5ccccc5)no4)cccn3c2=O)cc1,0 +EOS92511,COc1ccc(Nc2nc(CN3CCN(S(=O)(=O)c4c(C)noc4C)CC3)cs2)cc1,0 +EOS75292,O=C(Cn1[nH]c(=O)ccc1=O)NC[C@@H](c1ccccc1)N1CCCC1,0 +EOS68622,C[C@H](NC(=O)COc1ccc(N2CCCC2=O)cc1)C(N)=O,0 +EOS93210,Cc1cc(-n2c(C)cc(C(=O)Cn3cnc4c(cnn4C)c3=O)c2C)no1,0 +EOS29869,O=C(Nc1ccc(F)cc1)c1ccc(=O)n(CCN2CCOCC2)c1,0 +EOS64886,CS(=O)(=O)NC[C@H]1Cc2ccccc2O1,1 +EOS49657,CON(C)C(=O)Cn1nc(C(F)(F)F)cc1Br,1 +EOS2134,O=S(=O)(NCc1ccc2c(c1)OCO2)N1CCCC1,1 +EOS55726,N#Cc1c(F)cccc1N1CCN(c2ccc(N)nc2)CC1,1 +EOS94466,CC(=O)c1ccc(S(=O)(=O)Nc2nnc(-c3cccc(C#N)c3)o2)cc1,1 +EOS28957,CCc1ccc(NC(=O)Cn2nc3c(N4CCCCCC4)nccn3c2=O)cc1,1 +EOS2355,Cc1nc(C)n(CC(C)C(=O)NCc2ccccc2)n1,1 +EOS31977,COC[C@@H]1CN(C(=O)c2ccc(C)cn2)C[C@H]1C(N)=O,1 +EOS12981,Cc1cn2c3c(=O)n(Cc4ccccc4)c(=O)n(C)c3nc2n1CCCN(C)C,1 +EOS47263,CCCc1cc(C(=O)N2CCC[C@H](Cn3cncn3)C2)cc(=O)[nH]1,1 +EOS97698,CC(C)n1ccnc1CSCCN(C)C,1 +EOS14145,CCCC(=O)N1CCN(c2ccc(Nc3cccc(C)n3)nn2)CC1,1 +EOS9133,O=C(c1ccc2c(c1)CCCC2)N1C[C@H](O)[C@@H](N2CCOCC2)C1,1 +EOS73842,CC(C)(C)NC(=O)CCN1CCc2[nH]nc(C(F)(F)F)c2C1,1 +EOS28946,CC(=O)c1ccc(NC(=O)Cn2nc3c(Oc4ccccc4C)nccn3c2=O)cc1,1 +EOS36400,Cc1cc(=O)oc2cc(O)c(-c3ccnc(N)n3)cc12,1 +EOS75052,Cc1ccnc(NC(=O)CCNS(=O)(=O)c2cn(C)c(C)n2)c1,1 +EOS101243,Cc1ccc(C(=O)c2cc(O)c(O)c([N+](=O)[O-])c2)cc1,1 +EOS91731,CC(C)O[C@@H]1CCN(C[C@H](O)c2ccc(C(F)(F)F)cc2)C1,1 +EOS20531,COCCCN(Cc1cccn1C)CC(O)COCC(C)C,1 +EOS73088,Cc1ncsc1CCN1CCNC(=O)[C@@H]1c1ccccc1,1 +EOS101591,COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)C[C@@](O)(C(C)=O)C[C@@H]3O[C@H]1C[C@H](N)[C@H](O)[C@H](C)O1.Cl,1 +EOS55126,COCCc1nnc(SCC(F)(F)F)n1N,1 +EOS23522,Cc1cccn2c(=O)c3cc(C(=O)N4CCCCCC4)n(C)c3nc12,1 +EOS66412,COc1ccc(Cl)cc1NC(=O)CSc1nc(O)cc(=O)n1CC(C)C,1 +EOS30366,O=C(c1cc(CN2CCOCC2)on1)N1CCCC(c2nc(C3CCOCC3)no2)C1,1 +EOS42829,O=C(Nc1cccc(CNC(=O)[C@H]2CCc3nccn3C2)c1)c1ccco1,1 +EOS19470,CCNC(=O)CCc1nc(CS(=O)(=O)c2ccccc2)no1,1 +EOS52119,Cc1nc(CN(C)C(=O)[C@H]2CC(=O)N(CC(F)(F)F)C2)no1,1 +EOS44623,COc1cc(NC(=O)c2csc(-c3ccco3)n2)c(C(N)=O)cc1OC,1 +EOS72244,O=C(CSc1ccncc1)N1CC[C@H](O)C12CCCC2,1 +EOS66281,COCc1ccc(CNC[C@H]2CNc3cc(C)nn3C2)cc1,1 +EOS82921,Cl.N[C@@H]1CCCC[C@H]1NC(=O)c1ccc2c(c1)NC(=O)CO2,1 +EOS11868,CCOc1ccc(Br)cc1S(=O)(=O)N1CCN(c2ccccc2OC)CC1,1 +EOS8750,Cc1cc(NC(=O)C2CCCN(CC(N)=O)C2)ccc1N1CCCC1=O,1 +EOS74107,CC(C)CC(=O)Nc1cc(C2CC2)nn1[C@H]1CCS(=O)(=O)C1,1 +EOS3220,CCN(C(=O)c1ccc(-c2c(C)[nH]c(=O)[nH]c2=O)cc1)[C@H]1COC[C@@H]1O,1 +EOS75496,CCOc1ccc2c(c1)C=C(C(=O)Nc1ccc3c(c1)CCN3C(C)=O)CO2,1 +EOS2177,CC(=O)c1cccc(NC(=O)c2cc(S(=O)(=O)N3CCCC3)cs2)c1,1 +EOS48254,Cc1ccc(C(=O)Nc2cnn(C[C@H]3CCCCN3C(=O)c3ccc(C)n3C)c2)n1C,1 +EOS60252,COc1ccc2oc(=O)c(C(=O)NCc3ccccn3)cc2c1,1 +EOS88181,CC1(C)CCc2sc(NC(=O)N3CCc4ccnc(O)c4C3)nc21,1 +EOS1394,CCOc1ncccc1CN(C)C(=O)c1ccc(Br)o1,1 +EOS55066,Cc1cc(C)cc(N(C)Cc2cn(C)c(=O)n(C)c2=O)c1,1 +EOS101060,Cl.N=C(NCc1ccccc1)NC(=O)c1nc(Cl)c(N)nc1N,1 +EOS41021,O=C(Nc1ccc(OC2COC2)cc1)[C@]1(c2ccccc2)CCCO1,1 +EOS179,COCCCNC(=O)c1ccc(N2CCCCS2(=O)=O)cc1Cl,1 +EOS42600,CS(=O)(=O)c1ccc(C(=O)Nc2ccc(C(=O)NC3CC3)cc2)cc1,1 +EOS30056,CCCN(CC1CCOC1)S(=O)(=O)c1ccc(C(F)(F)F)cc1,1 +EOS71174,CC1(C)COC[C@@H]1NC(=O)c1oc2ccccc2c1Cn1cccn1,1 +EOS78531,Cc1ccc(CN(C)C(=O)c2cccc(C)n2)cc1,1 +EOS43509,O=C1OCCN1c1cccc(C(=O)N(c2cccc(F)c2)C2CCOCC2)c1,1 +EOS29806,COc1ccc(NC(=O)c2c(O)c3cccnc3[nH]c2=O)c(OC)c1,1 +EOS16178,CCC(=O)N1CCc2sc(S(=O)(=O)NCc3ccccc3OC)cc2C1,1 +EOS81233,O=C(c1cnccn1)N1CCC[C@H]1c1nnc2n1CCCCC2,1 +EOS47464,Cc1ccc(C(=O)N[C@@]2(C)CCS(=O)(=O)C2)cc1F,1 +EOS63147,CO[C@@H](CNc1ccccc1OCC(F)(F)F)C1CCOCC1,1 +EOS47476,CCCCNC(=O)NC(=O)CN1CCC(C(=O)c2ccc3c(c2)OCCO3)CC1,1 +EOS44914,Cc1n[nH]c2ncc(CNC[C@@H](c3cccs3)N3CCCC3)cc12.Cl,1 +EOS83454,CCN(Cc1ccc(Cl)s1)C(=O)Cn1c(=O)n(CC)c(=O)c2ccccc21,1 +EOS101236,CN(C)C[C@@H]1CCn2cc(c3ccccc32)C2=C(C(=O)NC2=O)c2cn(c3ccccc23)CCO1.Cl,1 +EOS30121,CCc1ccc(N2CC(c3noc(-c4ccccn4)n3)CC2=O)cc1,1 +EOS84491,CC(=O)Nc1cccc(NC(=O)c2ccc3nccnc3c2)c1,1 +EOS66886,Cc1noc(C)c1CC(=O)NC[C@@H](c1cccs1)N(C)C,1 +EOS67062,COC1(C(=O)N2CC[C@H](c3nc(C)cs3)C2)CS(=O)(=O)C1,1 +EOS69662,Cc1ccc(-c2nc(C(=O)NCCS(N)(=O)=O)cs2)c(C)c1,1 +EOS19323,CCCNC(=O)c1ccc(N2CCNCC2)c(NS(=O)(=O)c2cccs2)c1.O=C(O)C(F)(F)F,1 +EOS29690,CCc1nnc(-c2ccc(=O)n(CC(=O)Nc3ccc(OC)c(Cl)c3)c2)o1,1 +EOS37142,CN(C(=O)Cn1nnn(-c2cccs2)c1=O)c1nc2ccccc2s1,1 +EOS56625,Cc1ccc2nc(CN3C[C@H](C)OC(C)(C)C3)cc(=O)n2c1,1 +EOS48993,OCCCSc1nnc(COc2ccc(Cl)cc2)[nH]1,1 +EOS66335,O=C(Nc1ccc(-n2cncn2)nc1)c1cnn(-c2ccccc2F)c1,1 +EOS42224,COc1ccccc1-n1ncc(C(=O)Nc2cccc3nccn23)c1C,1 +EOS17576,COc1cc(NC(=O)Cn2cnc3c([nH]c4cc(OC)c(OC)cc43)c2=O)cc(OC)c1,1 +EOS35103,CS(=O)(=O)N(CC(=O)NC1CC2CCC1C2)c1cccc(F)c1,1 +EOS68306,Cn1ccnc1C[C@H]1CCCN(Cc2cc(C#N)cs2)C1,1 +EOS75255,O=C(Nc1nccs1)c1cc2ccccc2[nH]c1=O,1 +EOS29172,CCC(=O)Nc1ccc(-n2cnc(C(=O)N3CCN(c4ccc(F)cc4)CC3)c2)nc1,1 +EOS24746,CC(Oc1ccccc1)C(=O)N1CCN(S(=O)(=O)c2cc3c(cc2Cl)NC(=O)CO3)CC1,1 +EOS60479,COc1ccc(S(=O)(=O)Nc2ccc(C)c(S(=O)(=O)N(C)C)c2)cc1,1 +EOS6670,CCc1oc(C(=O)N2CCOC(c3ccccc3)C2)cc1CN1CCCC1,1 +EOS60213,CC(=O)NCc1ccc(C(=O)CN2C(=O)N[C@@](C)(Cc3ccc4c(c3)OCO4)C2=O)s1,1 +EOS29060,CC(=O)N1CCc2cc(N3CC(C(=O)Nc4ccc(F)cc4)CC3=O)ccc21,1 +EOS20201,Cc1cnc(C2CC2)n1CCNC(=O)C(C)Oc1ccccc1,1 +EOS55933,COc1cc(NC(=O)c2ccc(C#N)c(C)n2)cc(-n2cccn2)c1,1 +EOS34900,COc1ccc(OCC(O)Cn2c(Nc3ccccc3)nc3c2c(=O)[nH]c(=O)n3C)cc1,1 +EOS49292,Cc1ccc(CC(=O)N2CCC[C@H](n3ccnc3C)C2)s1,1 +EOS83010,Cc1cccc(NC(=O)CCc2c(C)nc(C)[nH]c2=O)c1,1 +EOS92560,CC(=O)Nc1ccccc1OCC(=O)N1N=C(c2ccco2)C[C@H]1c1ccco1,1 +EOS69831,CC[C@H](Sc1ccccc1)C(=O)NCc1cc(=O)nc(SC)[nH]1,1 +EOS82679,N#Cc1ccnc(N2CCN(CC(=O)NC(N)=O)CC2)c1,1 +EOS32067,O=C(Cc1cccc(F)c1)N1CCC(n2cc(COCC3CC3)nn2)C1,1 +EOS53528,O=C1COc2cc(C(=O)Nc3ccc(Cl)cc3)ccc2N1,1 +EOS70163,CCCCN1CCCC[C@@H]1CNC(=O)Nc1ccc(-n2cncn2)nc1,1 +EOS97090,COc1ccccc1NC(=O)COc1ccc(C(=O)N(C)C[C@H]2COc3ccccc3O2)cc1OC,1 +EOS97137,COc1ccc(C(C)(C)C)cc1NC(=O)Cn1cnc2c1c(=O)n(C)c(=O)n2C,1 +EOS46355,Cc1ccc2c(c1)[C@H]1CN(C)CC[C@@H]1N2S(=O)(=O)c1ccc(C)c(N(C)C)c1,1 +EOS85878,Cc1cccc(N(C)C(=O)c2cc(Br)c(C)n(C)c2=O)c1,1 +EOS86566,Cc1cc(NC(=O)CNc2ccccc2C(=O)NCCc2ccccc2)no1,1 +EOS88849,CC(=O)Nc1ccccc1CNc1cc(C(F)(F)F)n(C)n1,1 +EOS58411,O=C([C@@H]1CCCCN1C(=O)c1ccco1)N1CC(O)(C(F)F)C1,1 +EOS68759,Cc1cc(C)n2nc(C(=O)Nc3ccc(Br)cc3)nc2n1,1 +EOS21738,CN(CC(=O)N1CCN(c2ccc(F)cc2)CC1)S(=O)(=O)c1ccc2c(c1)c(=O)n(C)c(=O)n2C,1 +EOS33555,O=C(NCCN1CCOCC1)C1CCc2sc(C(=O)N3CCCCC3)cc2C1,1 +EOS45003,CC(C)NC(=O)Cn1ncnc1-c1ccccc1Cl,1 +EOS81781,Cc1ccc(C(=O)C2CCN(c3cc(=O)n4ccccc4n3)CC2)cc1,1 +EOS58118,COc1cnc(C2(NC(=O)C[C@H](C)n3cccn3)CCCC2)[nH]c1=O,1 +EOS92643,COc1ccc(NC(=O)COc2ccc3c(C)cc(=O)oc3c2)c(OC)c1,1 +EOS20358,O=C(c1cncc(Br)c1)N1CCCC1CCc1noc(C2CC2)n1,1 +EOS1448,COc1cccc(C(=O)N2CCC(N(C(C)=O)C3CCOCC3)C2)c1C,1 +EOS40322,Cc1cccc(-c2ccc(C(=O)N(C)Cc3cnn(C)c3)c(=O)[nH]2)c1,1 +EOS53159,CCn1c(SCC(=O)N2CCCCC2)nc2cc(S(=O)(=O)N3CCCCC3)ccc21,1 +EOS97722,Fc1cccc(Cn2ccnc2-c2cccnc2)c1,1 +EOS29173,CCC(=O)Nc1ccc(-n2cnc(C(=O)N3CCN(c4ccccc4OC)CC3)c2)nc1,1 +EOS12054,CC(C)N1CC(C(=O)Nc2ccc(Br)c(Cl)c2)CC1=O,1 +EOS85246,O=C(Cc1n[nH]c2ccccc12)N1CCOCC(F)(F)C1,1 +EOS55119,CCn1ncc2c1CCC[C@@H]2NCc1ccsc1,1 +EOS97541,Cc1c(C(=O)N2CCC3(C2)OCCO3)cnn1CCO[C@@H]1CCCCO1,1 +EOS32335,CC(=O)N1CCN(C(=O)c2cc(C)n(C)n2)CCC1c1nc(C)no1,1 +EOS100737,CC(C)C[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)CNC(=O)[C@@H](C)NC(=O)[C@@H](N)Cc1ccc(O)cc1)C(=O)O,1 +EOS97418,CC(C)Oc1cccc(N2CCN(C(C)C)CC2)n1,1 +EOS66465,CNC(=O)c1ccc(NC(=O)N2CCC[C@H](c3nc(C)no3)C2)cc1,1 +EOS30502,CN(CCNC(=O)C1CCCCC(=O)N1Cc1ccccc1)C1CCCCC1,1 +EOS50937,CCOc1ccccc1N1CCN(C(=O)c2ccc(OC)c(OC)c2OC)CC1,1 +EOS73036,CC[C@@H](CNC(=O)c1ccncc1)N1CCCC1,1 +EOS72932,CCN(C[C@H]1CCOC1)C(=O)NCc1ccnc(N2CCOCC2)c1,1 +EOS20126,O=C(CCc1ccc2c(c1)CN(S(=O)(=O)c1cccc(F)c1)CCO2)N1CCCC1,1 +EOS36759,CCOc1ccc(C2C(C(=O)c3ccc4c(c3)OCCO4)=C(O)C(=O)N2CCCN2CCOCC2)cc1,1 +EOS86605,Cc1ccccc1-c1cnc(NC(=O)c2cnn(C)c2)s1,1 +EOS17540,CCOc1ccc(N2CCn3c(SCC(=O)Nc4cccc(NC(C)=O)c4)nnc32)cc1,1 +EOS21408,CCCCn1c(=O)c2sccc2n2c(=O)n(CC(=O)Nc3cc(OC)c(OC)c(OC)c3)nc12,1 +EOS94425,Cn1nc(-c2ccco2)cc1NC(=O)c1ccc(OCC(F)F)nc1,1 +EOS44088,Cc1ccc2ncc(CN(CC(=O)N(C)C)CC(F)(F)F)n2c1,1 +EOS22776,CCC(=O)N1N=C(c2c(O)[nH]c(=O)n(C)c2=O)CC1c1ccc(OC)cc1,1 +EOS19299,O=C(Cn1nc2ccc(Sc3cccc(F)c3)nn2c1=O)NC1CCCC1,1 +EOS36645,Nn1c(Nc2ccc(Nc3ccccc3)cc2)nncc1=O,1 +EOS45188,CN1CCN(S(=O)(=O)c2ccc(NC(=O)c3ccccn3)cc2)CC1,1 +EOS50953,O=C(c1ccc(-c2ccco2)[nH]c1=O)N1CCCN(Cc2ccc(Cl)cc2)CC1,1 +EOS28966,CC(=O)c1cccc(NC(=O)Cn2nc3c(N4CCN(c5ccccc5)CC4)nccn3c2=O)c1,1 +EOS26888,CCOc1ccc(NC(=O)Cn2nc(-c3ccncc3)ccc2=O)cc1,1 +EOS61477,COc1cc(C(=O)N[C@H](C(=O)NCc2ccc3c(c2)OCO3)C(C)C)cc(OC)c1OC,1 +EOS97296,O=C(Cc1n[nH]c(=O)c2ccccc12)Nc1cccc(Br)c1,1 +EOS17737,COCc1nnc2n1CCC(NC(=O)CCc1ccc(OC)cc1)CC2,1 +EOS20359,Cc1noc(C)c1CCC(=O)N1CCCC1CCc1noc(-c2ccccn2)n1,1 +EOS13504,COC(CNC(=O)Cn1cc(NC(=O)CCOc2ccccc2)cn1)c1cccs1,1 +EOS11683,CN(c1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCOCC3)cc2)cc1)S(=O)(=O)c1ccccc1,1 +EOS84909,CC[C@@](C)(CCCNC(=O)Nc1ccc(C(=O)N2CCCC2)cc1)C1OCCO1,1 +EOS35148,O=C(CCNC(=O)c1cnccn1)Nc1ncc[nH]1,1 +EOS29151,Cc1nc(-c2ccccn2)cc2nn(CC(=O)Nc3ccc(C#N)cc3)c(=O)n12,1 +EOS96021,CCn1c(=O)c(C)nc2cc(C(=O)N3CCN(C(=O)[C@@H]4COc5ccccc5O4)CC3)ccc21,1 +EOS29084,CCC(=O)N1CCc2cc(N3CC(C(=O)Nc4ccc(C)cc4)CC3=O)ccc21,1 +EOS90326,Cn1ccnc1C(=O)c1ccc(NC(=O)CCN2C(=O)COc3ccccc32)cc1,1 +EOS27652,CCC(C)NC(=O)CSc1nc2ccccc2c(=O)n1CCCC(=O)NCC1CCCO1,1 +EOS46701,CCn1nc(C)c(C(=O)NC[C@@H](CO)c2cccnc2)n1,1 +EOS54876,NC(=O)c1cc(-c2csc(Nc3ccccn3)n2)ccc1O,1 +EOS39531,CCC(CC)NC(=O)C1CCN(c2ccc3nnc(C(F)(F)F)n3n2)CC1,1 +EOS92675,O=C(CSCc1cc(=O)n2ccsc2n1)N1CCC[C@H]1c1ccc2c(c1)OCCCO2,1 +EOS55454,CC(C)c1nc([C@H](C)NC(=O)c2c[nH]c(=O)c3ccccc23)cs1,1 +EOS75192,Fc1ccc(Oc2ccc(CNC[C@H]3CNc4ccnn4C3)cn2)cc1,1 +EOS284,O=C(Nc1ccc2c(c1)N(C(=O)c1cccnc1)CCC2)C1CC1,1 +EOS95426,O=C(N[C@H]1CCc2[nH]ncc2C1)N1CCSC2(CCCCC2)C1,1 +EOS84730,CNC(=O)c1ccc(CNC(=O)Cc2cn(C)c3ccccc23)cc1,1 +EOS94432,O=C(COc1ccccc1Cc1ccccc1)N1CCCS1(=O)=O,1 +EOS28936,COc1cccc(Oc2nccn3c(=O)n(CC(=O)NCc4ccc5c(c4)OCO5)nc23)c1,1 +EOS12051,CC(=O)Nc1ccc(NC(=O)CSc2nnc(-c3ccc(N)cc3)n2C)cc1,1 +EOS37291,CNC(=O)[C@@H]1CN(CC(=O)Nc2ccc(C(C)=O)cc2)c2ccccc2O1,1 +EOS50339,Cc1nc([C@H]2CCCN(C(=O)C3=COCCC3)C2)no1,1 +EOS13320,CCNC(=O)c1ccc(C)c(NS(=O)(=O)c2ccccc2)c1,1 +EOS11266,Cc1ccc(NC(=O)CN2C(=O)c3ccccc3C2=O)c(C)c1,1 +EOS87753,CC(=O)c1ccc(NS(=O)(=O)c2ccc3c(c2)OCCCO3)cc1,1 +EOS85014,C[C@@H](CC(=O)Nc1cccc(N2CCCC2)c1)NC(=O)c1cnc2n1CCCC2,1 +EOS37592,O=C(Nc1cccc2ccncc12)c1cnn2ncccc12,1 +EOS19978,CCCc1ccc(S(=O)(=O)N2CCN(c3cc(-c4ccncc4)n[nH]3)CC2)cc1,1 +EOS62269,Cc1nn(C)c(OCC(F)(F)F)c1CN1CC[C@H](CNc2ccccn2)C1,1 +EOS60649,NC1=C(c2nc3ccccc3s2)C(=O)CN1c1ccc2cn[nH]c2c1,1 +EOS102444,CC(C)CCOc1ccc(NC(=S)Nc2ccc(OCCC(C)C)cc2)cc1,1 +EOS49316,Cc1oc(-c2ccco2)nc1CC(=O)N1CCC[C@@H]1Cn1cccn1,1 +EOS16063,O=C(NCc1cccnc1)c1cccc(CN2C(=O)C3CCCN3C(=O)c3ccccc32)c1,1 +EOS19649,COc1cccc(NC(=O)Cn2c3ccccc3n3c(=O)cc(-c4ccncc4)nc23)c1,1 +EOS53775,O=C(Nc1nc2cc3c(cc2s1)OCCO3)[C@H]1CCCN(C(=O)c2ccoc2)C1,1 +EOS53337,O=C(Nc1cccc(NC(=O)c2ccco2)c1)c1ccc2[nH]cnc2c1,1 +EOS19546,O=C(Nc1ccccc1)C1CCN(S(=O)(=O)c2cccc3nonc23)CC1,1 +EOS28535,COc1ccc(CNC(=O)Cn2c(=O)c(CCC(=O)NCC(C)C)nc3ccccc32)cc1,1 +EOS71857,Cn1cc(C(=O)N(C[C@H](O)c2ccc(F)cc2)C2CC2)ccc1=O,1 +EOS14091,Cc1nc(N2CCN(C(=O)Nc3cc(F)cc(F)c3)CC2)cc(-n2cccn2)n1,1 +EOS95991,Cn1cnnc1[C@H]1CCCN(C(=O)c2cc(Cl)c(Cl)n2C)C1,1 +EOS28448,CNC(=O)C1CCN(c2nc3ncn(CC(=O)Nc4cccc(C(C)=O)c4)c(=O)c3s2)CC1,1 +EOS92429,N#CCN1CCCC[C@H]1c1nc(-c2ccccc2)no1,1 +EOS54958,CCCOc1ccc(S(=O)(=O)NC[C@H](C)N2CCOCC2)cc1,1 +EOS22640,Cc1ccc(CS(=O)(=O)N2CCC(C(=O)Nc3ccc4c(c3)n(C)c(=O)n4C)CC2)cc1,1 +EOS30237,CCn1ncc(N2CCCC(CC(=O)NCc3ccc(C)cc3)C2)cc1=O,1 +EOS13810,CCN(Cc1ccccc1)S(=O)(=O)c1ccc(N2C(=O)CCC2=O)cc1,1 +EOS26448,Cn1cc(S(=O)(=O)N2CCCCC2)cc1C(=O)Nc1ccc2c(c1)OCCO2,1 +EOS36845,CCn1c(=O)c(C(=O)Nc2nc3ccccc3[nH]2)c(O)c2ccccc21,1 +EOS29071,CC(=O)Nc1ccc(NC(=O)C2CC(=O)N(c3ccc4c(c3)CCN4C(C)=O)C2)cc1,1 +EOS73388,CC(=O)N1CCC(NC(=O)c2ccc(Oc3cccc(F)c3)cn2)CC1,1 +EOS97377,C[C@H](NC(=O)CN1C(=O)N(C)C2(CCCCC2)C1=O)c1cc2ccccc2o1,1 +EOS42421,CC[C@H]1c2ccsc2CCN1C(=O)CN1C(=O)CC2(CCCC2)C1=O,1 +EOS59377,Cc1cccc(NC(=O)N2CCN(Cc3ccccc3)[C@H](C#N)C2)n1,1 +EOS12962,O=S(=O)(Nc1cccc2cccnc12)c1ccc2c(c1)OCCO2,1 +EOS29798,Cc1ccccc1-c1noc(-c2c(O)c3ccccc3n(C)c2=O)n1,1 +EOS52786,COCc1cccc(NC(=O)N[C@H]2CCCc3c2cnn3CCO)c1,1 +EOS47267,O=C(CCc1ccc(=O)[nH]c1)N1CCC[C@H]1COc1ccc(F)cc1,1 +EOS81657,O=C(c1ccoc1)N1CCN(C[C@H](O)COc2ccc(F)cc2)CC1,1 +EOS12695,CC1CC(C)CN(CC(=O)Nc2cccc(C(F)(F)F)c2)C1,1 +EOS70223,COc1ccc(CN(C)C(=O)c2coc(Br)c2)cc1O,1 +EOS73405,N#Cc1ccc(NC2CCN(C(=O)[C@@H]3CCCN3S(=O)(=O)c3cccs3)CC2)nc1,1 +EOS84484,O=C(NCC1CC1)c1cc2c(cc1Cl)N1CCCCCC1=NS2(=O)=O,1 +EOS66976,O=C(Nc1ccc(-n2cccn2)c(F)c1)N1CCC(NS(=O)(=O)c2cccs2)CC1,1 +EOS100948,N#Cc1c(NC(=O)c2cccc3ccccc23)sc2c1CCCC2,1 +EOS28571,CCn1c(=O)c(N2CCC(NC(=O)Nc3ccccc3)CC2)nc2cccnc21,1 +EOS28959,COc1cccc(NC(=O)Cn2nc3c(N4CCCCCC4)nccn3c2=O)c1,1 +EOS26607,Cc1nn(C(=O)c2ccc(F)cc2)c(C)c1S(=O)(=O)N1CCCC1,1 +EOS13838,CCCOc1ccc(NC(=O)CSc2nc(=O)cc[nH]2)cc1,1 +EOS11149,COc1ccc(C(CC(=O)N(C)C)c2ccccc2)cc1,1 +EOS70092,O=C(Nc1ccccc1F)N1CCC[C@H](Cn2cncn2)C1,1 +EOS70130,CC(C)Cn1ncnc1CN1CCN([C@@H](C)c2ccsc2)CC1,1 +EOS14953,O=C1CC(c2cccs2)c2cc3c(cc2N1)OCO3,1 +EOS67904,CC(=O)Nc1ccc(C(=O)NC[C@@H](c2ccc(C)o2)N2CCOCC2)cc1,1 +EOS50359,C#CCN1CCC(C(=O)N2CCN([C@@H](C)c3cccnc3)CC2)CC1,1 +EOS86721,COCCCNC(=O)CN(C)C(=O)c1[nH]c(C)c(C(C)=O)c1C,1 +EOS101924,COc1cc(NC(=O)Cc2nn(C)c(=O)c3ccccc23)ccc1Cl,1 +EOS74603,CCC1(CC)CN(C(=O)c2nc(C)c(C)[nH]c2=O)CC[S@@]1=O,1 +EOS35381,COc1ccc(N2C(=O)C(C)CS2(=O)=O)cc1S(=O)(=O)N(Cc1ccccc1)C(C)C,1 +EOS17665,CC(=O)Nc1ccc(CC(=O)N2CCC(CCOc3ccccc3F)C2)cc1,1 +EOS79945,COc1ccc(-c2cc(C(=O)Nc3ccnc4ccnn34)[nH]n2)cc1,1 +EOS68369,Cc1ccccc1NC(=O)C[C@H](C)Nc1ccc2c(c1)C(=O)NC2,1 +EOS44134,CCOc1ccc([C@H](C)NC(=O)c2cn3nc(C)cc3[nH]c2=O)cc1OC,1 +EOS71441,NC(=O)[C@H]1CCCN(C(=O)NCCc2cccs2)C1,1 +EOS32679,Cc1cncc(C(=O)N2CC(N3CC(F)(F)C[C@H]3CN(C)C)C2)c1,1 +EOS28890,CCN1C(=O)COc2cc(-c3noc(C(=O)NCc4cccnc4)n3)ccc21,1 +EOS18150,CCC(=O)N1CC(c2nc(-c3ccc(F)cc3)no2)C2(CCOCC2)C1,1 +EOS89428,COc1cc(S(N)(=O)=O)ccc1N,1 +EOS56951,CNC(=O)NCC(=O)N1CC[C@H](C)N(Cc2ccccc2)CC1,1 +EOS40201,Cc1nc(C)n(C[C@H]2CCCN(Cc3cscn3)C2)n1,1 +EOS61516,CC(C)c1ccc(NC(=O)NC[C@H]2CCS(=O)(=O)C2)cc1,1 +EOS51419,CC(=O)Nc1cc(C(=O)NC[C@H](C)N2CCc3ccccc32)ccc1F,1 +EOS98162,CCCNC(=O)N1CCCC1c1cc(C)no1,1 +EOS62782,Cc1ccccc1NC(=O)CCc1ccc2c(c1)OCCO2,1 +EOS21348,COc1ccccc1NS(=O)(=O)c1ccc(OC)c(OC)c1,1 +EOS13016,CCOCCn1c(=O)c2c(nc3n(-c4ccccc4)c(C)c(C)n23)n(C)c1=O,1 +EOS60820,CC[C@H](NC(=O)Cn1nc(C)c(S(=O)(=O)N(C)C)c1C)c1ccc(C)cc1,1 +EOS1376,CCc1nnc(-c2nnc(NCCCN3CCN(C)CC3)c3ccccc23)o1,1 +EOS13754,COc1ccc(C(=O)Nc2nnc(-c3ccncc3)s2)c(OC)c1,1 +EOS85074,c1cnnc(N2CCC[C@@H]2CNCc2ccsc2)c1,1 +EOS97700,CS(=O)(=O)CCCONC1CCCC1,1 +EOS91906,O=C1CN(NC(=O)C[C@@](O)(c2nc3ccccc3s2)C(F)(F)F)C(=O)N1,1 +EOS80917,CC(C)(C)NC(=O)NC(=O)CN1CCN(c2ccccc2O)CC1,1 +EOS94024,O=C(c1cn(-c2ccccc2)nc1-c1cccnc1)N1CCN(C(=O)N2CCCC2)CC1,1 +EOS29141,COc1ccc(-c2cc3nn(CC(=O)NCc4ccc(F)cc4)c(=O)n3c(C)n2)cc1,1 +EOS58528,Cc1nccn1[C@H]1CCCN(C(=O)c2cccn2Cc2cccnc2)C1,2 +EOS17544,O=C(NCc1ccco1)c1ccc(-n2nnc3cccnc32)cc1,2 +EOS48187,CC(C)(CO)CSc1nnc2sc3ccccc3n12,2 +EOS8378,CCOc1ncccc1C(=O)N1CC(=O)N(c2ccc(C)cc2)CC1C,2 +EOS72263,COc1cc(C)ccc1OCCC(=O)N1CC[C@@H](N)C1.Cl,2 +EOS32876,O=C(CC1COC2(C1)CN(C(=O)CCc1nc(-c3ccccc3)no1)C2)NCC1CC1,2 +EOS56337,CCCn1nc(C(=O)N[C@@H](C)Cn2cccn2)ccc1=O,2 +EOS69439,CCn1nc(C2CC2)cc1C(=O)N1C[C@H](C)NC(=O)c2ccccc21,2 +EOS93462,Cc1sc2nc3ccc(N4CCCCC4)nn3c(=O)c2c1C,2 +EOS35123,COc1ccc(-c2ccc(=O)n(CN3CCN(Cc4ccccc4)CC3)n2)cc1OC,2 +EOS32554,COc1cc(OC)cc(C(=O)N2CCCC(c3nc(C)ncc3CO)C2)c1,2 +EOS74345,CON(C)C(=O)c1cnc(-c2ccc(C)cc2C)s1,2 +EOS68601,O=c1c2cc(Cl)ccc2ncn1C[C@H](O)COCc1ccccc1F,2 +EOS35932,COc1ccc2nc(NC(=O)COc3ccc(C(=O)N4CCOCC4)cc3)sc2c1,2 +EOS14492,Cc1ccn(-c2ccc(N3CCN(C(=O)Nc4cccc(F)c4)CC3)nn2)n1,2 +EOS69256,C[C@@H](C1CC1)n1cc(NC(=O)c2ccc3cc[nH]c3c2)cn1,2 +EOS5127,CCCC(=O)N1CCC(c2nnc(Cn3cncn3)n2C2CC2)CC1,2 +EOS80285,COc1ccccc1CN(C(=O)[C@H]1CCC(=O)NC1)C1CC1,2 +EOS96397,CC(C)[C@H](CNc1cnn(C)c(=O)c1Cl)Nc1ccccc1,2 +EOS8570,Cc1cccc2nc(C(=O)Nc3ccc(Oc4nnn[nH]4)cc3)cn12,2 +EOS30112,CCc1ccc(S(=O)(=O)N2CCC(N(C(=O)COC)C3CCOCC3)CC2)cc1,2 +EOS5291,CCN1CC(C(=O)N2CC(Oc3cccc(C)c3)C2)CC1=O,2 +EOS69488,C[C@H]1CCN(C(=O)c2cc3[nH]cnc3cc2F)CCN1Cc1ccccc1,2 +EOS18037,Cc1n[nH]c([C@@]23CCN(C(=O)Cc4c(C)noc4C)C[C@@H]2CN(C(=O)C(C)C)C3)n1,2 +EOS75443,C[C@]1(NC(=O)Cc2ccccc2Cl)CCS(=O)(=O)C1,2 +EOS61200,CC[C@H](C)C(=O)NCc1ccc(OCc2cccnc2)c(OC)c1,2 +EOS94761,O=C(Nc1ccc(-n2ccnc2)cc1)c1ccccc1,2 +EOS34219,O=C(Nc1ccc(Cl)cc1)C1CCCO1,2 +EOS65077,Cc1ccc(N2C[C@@H](C(=O)NCc3nc(N(C)C)no3)CC2=O)cc1,2 +EOS50967,Cc1cc(C)n(C[C@H]2CCCN2C(=O)c2ccc[nH]2)n1,2 +EOS5197,CC(C)c1noc([C@@H]2CCCN2C(=O)c2cn(C)c(=O)[nH]c2=O)n1,2 +EOS59542,COc1ccc(-c2nn(-c3ccccc3)cc2C(=O)N(C)OC)cc1OC,2 +EOS1006,CCC1CCCCN1C1=C(C(=O)N2CCOCC2)S(=O)(=O)c2ccccc21,2 +EOS45220,O=C(NCCN1CCCS1(=O)=O)c1ccc(-c2ccccc2)[nH]1,2 +EOS788,COc1cccc(NS(=O)(=O)c2c(C(=O)N3CCC(C)CC3)c(C)n(C)c2C)c1,2 +EOS20949,COc1cc(C(=O)N2CCN(Cc3ccc(SC)cc3)CC2)cc(OC)c1OC.O=C(O)C(=O)O,2 +EOS86130,O=C([C@@H]1CC=CCC1)N1CCN(S(=O)(=O)c2ccc(F)cc2)CC1,2 +EOS30812,COc1cccc(CNC(=O)C2CCN(c3cnn(Cc4ccccc4)c(=O)c3)C2)c1,2 +EOS6882,Cn1ccnc1C(NC(=O)N1CCOc2ccc(Cl)cc2C1)C1CC1,2 +EOS69228,CNC(=O)c1ccc(C)c(NC(=O)c2ccnn2CC(F)F)c1,2 +EOS27463,CCn1c(SCC(N)=O)nnc1-c1ccc2c(c1)OCCCO2,2 +EOS84589,O=C(Nc1ccccc1C(=O)NCc1cn2ccccc2n1)c1ccco1,2 +EOS81815,CC(C)Oc1ccc(Cl)c(C(=O)N2CCC[C@H]2c2cnn(C)c2)n1,2 +EOS33197,CN(C)S(=O)(=O)N(CCN1CCCC1)C1CCN(S(=O)(=O)c2ccc(F)cc2)C1,2 +EOS83172,CC(C)(C)NS(=O)(=O)c1cccc(NC(=O)c2ccc3c(c2)OCCO3)c1,2 +EOS84067,Cn1ncc(C2CC2)c1C(=O)Nc1ccc2c(c1)CC(=O)N2,2 +EOS80165,COc1cc(C#N)ccc1OC[C@@H](O)CN1CCC(C)CC1,2 +EOS35519,CCC(C)n1c(C)c(C)n2c3c(=O)n(C)c(=O)n(C)c3nc12,2 +EOS83696,CCc1cccc(NC(=O)CN2CCO[C@@H](C)C2)c1,2 +EOS69169,Cc1cc(NC(=O)c2cn3nccc3nc2C)ccc1OC(C)C,2 +EOS18880,Cc1c(CC(=O)N2CCOCC2)c(=O)oc2c(C)c(O)ccc12,2 +EOS87417,CNC(=O)[C@H](C)NC(=O)Cc1c(C)nc(-c2ccccn2)[nH]c1=O,2 +EOS80106,O=S(=O)([C@H]1CCCN(c2ccnc(C3CC3)n2)C1)N1CCOCC1,2 +EOS49715,CC(C)(C)n1ncn(CCc2ccc3c(c2)CCO3)c1=O,2 +EOS49811,Cn1cc(N2CCN(C(=O)[C@H]3CCc4cn[nH]c4C3)CC2=O)cn1,2 +EOS16480,Cc1cc(C)n(Cc2ccc(C(=O)N3C[C@@H](Oc4cccnc4)C[C@H]3C(=O)NC3CC3)o2)n1,2 +EOS35342,CN(C(=O)C1=C(c2ccccc2)SCCO1)c1ccccc1,2 +EOS7629,COCCn1cnnc1C(C)NC(=O)c1cc(C)oc1C,2 +EOS64203,CC(C)OCc1ccc(C(=O)Nc2ccccc2N2CCOCC2)cc1,2 +EOS56513,COc1ccc(OCC#CCNCc2ccco2)cc1.Cl,2 +EOS42248,Cc1c(S(=O)(=O)N2CCC[C@H](OCC3CC3)C2)cnn1C,2 +EOS48181,Cc1cc(C2(C(=O)NC[C@H](C)Oc3cccc(F)c3)CC2)on1,2 +EOS64099,Cc1cc(=O)n(CC(=O)Nc2ccc(F)cc2)c(SCC(N)=O)c1C#N,2 +EOS16091,Cc1ccccc1Sc1nccnc1SCC(=O)NCCN(C)C,2 +EOS18272,CCN(CC)C(=O)c1ccc(C)c(NC(=O)c2ccccc2)c1,2 +EOS15145,CCCc1cc(C(=O)Nc2cccc(C(C)=O)c2)no1,2 +EOS48819,C[C@@H]1CN(c2c(C#N)nnc3ccccc23)CCN(C2CC2)C1,2 +EOS26757,CC(=O)Nc1ccc(OCc2cc(=O)n3ccc(C)cc3n2)cc1,2 +EOS28926,Cc1nn(Cc2ccc(C(=O)NCC3CCCO3)cc2)c(C)c1S(=O)(=O)N1CCCCC1,2 +EOS78164,COc1c([C@@H]2CCCN2C(=O)Cn2cc(C)cn2)c(C)nn1C,2 +EOS52887,COc1ccccc1NC(=O)N1CCC[C@H](CN2CCC(C)CC2)C1,2 +EOS43076,CC(=O)Nc1ccc(SCC(=O)N(C)[C@H]2CCS(=O)(=O)C2)cc1,2 +EOS54533,CC(=O)[C@H](C)n1c(=O)c2c(nc3n(-c4ccc(F)cc4)c(C)cn23)n(C)c1=O,2 +EOS91708,CC(C)(C)c1ncc(CNc2ncccc2CO)s1,2 +EOS47741,O=C(NCC1=CCNCC1)[C@@H]1COc2ccccc2C1,2 +EOS82946,O=C(c1cn(-c2ccc(F)cc2)nn1)N1CCN(c2ccccc2O)CC1,2 +EOS46735,COc1ccc(C(=O)Nc2cccc(C(=O)N(C)C)c2)cc1,2 +EOS78495,Cc1nccn1[C@H]1CCCN(C(=O)CCc2cnn(C)c2)C1,2 +EOS43752,CC(C)(C)c1ccc([C@@]2(C)NC(=O)N(CN(CC#N)C3CC3)C2=O)cc1,2 +EOS43601,COc1ccc(-n2c(SCCCO)nc3ccccc3c2=O)cc1OC,2 +EOS91781,CC(=O)CCCCCNC(=O)Cc1csc(-c2ccccn2)n1,2 +EOS32539,COCCOCc1cnc(C)nc1C1CCCN(C(=O)c2cccnc2)C1,2 +EOS74564,O=C(Nc1cccc2c1OCO2)N1[C@H]2CC[C@@H]1C[C@H](c1ccccc1)C2,2 +EOS46025,CN(c1ccccc1)S(=O)(=O)c1cccc(NC(=O)CN2C(=O)CCOc3ccccc32)c1,2 +EOS60658,COc1ccccc1CN(C)c1ccc(S(=O)(=O)Nc2ccccn2)cn1,2 +EOS101423,C/C(=C\c1ccc(C(=O)O)cc1)c1ccc2c(c1)C(C)(C)CCC2(C)C,2 +EOS33504,COc1ccc(C(=O)Nc2ccc(N3CCN(c4ccccc4)C3=O)nc2)cn1,2 +EOS13531,O=C(Cc1noc2ccccc12)NC1CCCN(Cc2cccnc2)C1,2 +EOS21395,CCn1nc(C)c2c1c(=O)n(Cc1ccc(OC)cc1)c(=O)n2CC(=O)N1CCCCC1,2 +EOS90312,O=C(C[C@@H]1SC(N2CCOCC2)=NC1=O)Nc1ccccc1,2 +EOS2445,Cc1cc(C)n(-c2cc(N3CCN(C(=O)COc4ccccc4)CC3)ncn2)n1,2 +EOS19142,Cc1cc(NC(=O)Nc2cn(C)c(=O)c3ccccc23)no1,2 +EOS22595,Cc1ccc(NC(=O)C2CCN(S(=O)(=O)Cc3ccccc3)CC2)cc1,2 +EOS82858,C[C@@H](CNS(=O)(=O)C1CN(C(=O)c2ccccc2)C1)N1CCOCC1,2 +EOS25011,Cc1ccc(C(=O)N2CCN(c3c(C)n(-c4ccccc4)c(=O)n(C)c3=O)CC2)cc1,2 +EOS91135,CNC[C@H]1CCCN(S(=O)(=O)c2ccc(OC)c(Cl)c2)C1.Cl,2 +EOS1554,CCn1nccc1C(=O)N1C[C@@H](NC(=O)c2ccccc2)[C@H](c2nc(C)n[nH]2)C1,2 +EOS42199,Cc1cnn(-c2ccc(C(=O)N3C[C@H](C)OCC3(C)C)cc2)c1,2 +EOS10884,NC(=O)c1ccc(S(=O)(=O)N[C@@H]2COCC[C@@H]2OCC2CC2)cc1,2 +EOS19234,COc1ccc(CNC(=O)CC2CCN(C(=O)C3CCCCC3)CC2)cc1,2 +EOS73714,CCCS(=O)(=O)N1CCC(C(=O)NC[C@H]2Cc3ccccc3O2)CC1,2 +EOS36281,CC(C)Nc1nc(C#N)nc(NC(C)C)n1,2 +EOS74281,COc1ccc(-c2noc(CN3CCO[C@@H](C)C3)n2)cc1,2 +EOS59428,Cc1cccn2cc(CC(=O)N(CC(F)(F)F)C(C)C)nc12,2 +EOS98466,CCOc1ccc(-c2noc(CCC(=O)N(CCOC)CCN(C)C)n2)cc1,2 +EOS59117,CCC1(CC)NC(=O)N(C[C@H](O)COc2cc(C)ccc2C)C1=O,2 +EOS40964,Cc1ccc([C@H](CNC(=O)NCc2cc[nH]n2)N2CCOCC2)o1,2 +EOS31674,CCn1ccc(C(=O)N2C[C@@H](Oc3ccccc3)C[C@H]2C(=O)NC2CC2)n1,2 +EOS101385,N#Cc1ccc2[nH]c(O)c(-c3ccc(CN4CCOCC4)cn3)c2c1,2 +EOS87294,CC1(C)CCN(C(=O)NC[C@H]2CCOC3(CCC3)C2)CCS1(=O)=O,2 +EOS60145,CC(C)c1n[nH]c([C@@H]2CN(C(=O)c3cc(Cl)c[nH]3)CCO2)n1,2 +EOS90458,Cl.O=S(=O)(NCC[C@H]1CCNC1)c1ccccc1Cl,2 +EOS80260,O=C(c1c(O)cc(F)cc1F)N(Cc1ccco1)C1CC1,2 +EOS28555,CC(=O)Nc1ccc(S(=O)(=O)N2CCC(c3noc(-c4ccccc4)n3)CC2)cc1,2 +EOS34965,CC(C)n1ccc2c(C(=O)NCCN3CCOCC3)cccc21,2 +EOS82331,COc1ccccc1N1CCN(C(=O)CCNC(C)=O)CC1,2 +EOS96190,CO[C@@H](C)c1nnc(Cc2nc(-c3ccccc3)cs2)o1,2 +EOS85504,CCOCCCNC(=O)[C@@H](NC(=O)c1ccco1)C(C)C,2 +EOS5712,COc1cccc2ncn(Cc3ccccc3CN(C)C)c(=O)c12,2 +EOS86615,O=C(C1CC=CC1)N1CCC[C@H](c2cc(C(F)(F)F)[nH]n2)C1,2 +EOS90873,CCOc1ccc(-c2csc(CCN3CCOC3=O)n2)cc1,2 +EOS20619,NC(=O)c1ccccc1NC(=O)c1cccc(I)c1,2 +EOS63886,Cc1ccccc1CN(C)S(=O)(=O)c1cc(C(N)=O)sc1C,2 +EOS25224,Cc1ccc2c(c1)N(C(=O)Cn1nc3ccc(SC(C)(C)C)nn3c1=O)CCO2,2 +EOS17855,COCCn1nc2n(c1=O)CCN(C(=O)Cc1ccsc1)C2c1ccccc1,2 +EOS26802,O=C(Nc1ccccc1)N1CCN(c2nc3ccccc3n3cccc23)CC1,2 +EOS87770,CSc1ccc(N2C[C@@H](C(=O)N3CCN(C(=O)CC(C)C)CC3)CC2=O)cc1,2 +EOS93165,COc1cccc(F)c1NC(=O)CCc1cscn1,2 +EOS92810,Cn1ncc2c(=O)[nH]c(N3CCN(c4ccc(Cl)cc4)CC3)nc21,2 +EOS29000,Cc1nc2c(-c3ccncc3)nsc2c(=O)n1CC(=O)Nc1ccc2c(c1)OCO2,2 +EOS24947,COCCn1cc(C(=O)Nc2ccc(F)cc2F)c2nn(-c3ccccc3)c(=O)c-2c1,2 +EOS51767,O=C(CN1CCN(C(=O)c2nn(-c3ccccc3)c(=O)c3ccccc23)CC1)N1CCCCC1,2 +EOS90248,Cc1ccc(C(C)(C)C)cc1S(=O)(=O)CCCCS(C)(=O)=O,2 +EOS48186,CN(C)CC(=O)N[C@@H](Cc1ccccc1)c1nc2ccccc2o1,2 +EOS84204,Cc1cc(C(=O)N2CCSC[C@H]2C2CCC2)c2c(C)noc2n1,2 +EOS91903,Cc1cc(NC(=O)[C@H](C)N2CCSCC2)no1,2 +EOS93753,Cc1ccc(NC(=O)c2nn[nH]c2[C@@H]2CCCN2)cc1,2 +EOS50993,CNC(=O)NC(=O)CN1CCC[C@H]1c1nc2ccccc2s1,2 +EOS24437,COc1ccccc1NC(=O)CCn1cnc2oc(C)c(C)c2c1=O,2 +EOS54693,Cn1c(=O)n(CC(=O)Nc2nccn2Cc2ccccc2)c2ccccc21,2 +EOS7731,COCCNC(=O)c1cccc(NC(=O)C2CC23CCNCC3)c1C.Cl,2 +EOS40466,Cc1ccc(NC(=O)c2nn(-c3ccccc3)c(C)cc2=O)c(O)c1,2 +EOS40635,Cc1ccc([C@@H]2CN(C(=O)c3cc4ccccc4c(=O)[nH]3)CCO2)o1,2 +EOS67156,C[C@H]1CN(C(=O)c2c[nH]nc2-c2cccnc2)CC2(CCC2)O1,2 +EOS44393,CCOc1ccc(NC(=O)c2cccc(NC(N)=O)c2)cc1,2 +EOS89279,CC(C)Cc1ccc([C@@H](NC(=O)Cn2cccnc2=O)C2CC2)cc1,2 +EOS96607,CO[C@@H]1C[C@@H](c2ncn[nH]2)N(CCn2cnc3ccccc3c2=O)C1,2 +EOS44576,Cc1noc(C)c1CN1CCn2ccnc2C1,2 +EOS51077,Cl.N[C@@H]1CCN(C(=O)CSC(F)(F)F)C1,2 +EOS6745,CS(=O)(=O)N1CCCCC1CCNC(=O)c1n[nH]c2c1COCC2,2 +EOS43334,CCc1csc([C@H]2CCCN(C(=O)c3c(C)nn(C)c3OC)C2)n1,2 +EOS34539,O=C(CCC(=O)N1CCN(c2ccccn2)CC1)NCCc1c[nH]c2ccccc12,2 +EOS12390,CC(=O)c1cccc(OCCN2CC(C)OC(C)C2)c1.Cl,2 +EOS64973,CNC(=O)C1CCN([C@@H](C)C(=O)NCCc2ccc(F)cc2)CC1,2 +EOS87818,CC(=O)c1cc2c(cc1NC(=O)c1ccc(-n3cccn3)cc1)OCO2,2 +EOS86089,CN(C[C@H]1COc2ccccc2O1)C(=O)c1ccc2c(c1)C(=O)N(Cc1ccco1)C2=O,2 +EOS25795,O=C(CCS(=O)(=O)c1ccc2c(c1)CCN2C(=O)C1CC1)Nc1ccccc1,2 +EOS32968,CCn1ncc(C(=O)N2CCCC(C)(c3noc(C4CCOCC4)n3)C2)c1C,2 +EOS16872,Cc1c(C)n(C(=O)CN2CCC(C)CC2)c2ccccc12,2 +EOS7109,Cc1oc2c(c1C(=O)N1CCc3[nH]cnc3C1c1cccnc1)C(=O)CCC2,2 +EOS100490,O=C(c1ccc(Cl)cc1Cl)n1nnc2ccccc21,2 +EOS41323,CS(=O)(=O)N[C@H]1CCCN(Cn2c(=O)oc3ccccc32)C1,2 +EOS55767,CCn1ccnc1CN(CCOC)S(=O)(=O)c1ccc(C#N)cc1,2 +EOS26822,COc1cccc(CNC(=O)CSc2nnc3c(=O)n(-c4ccccc4)ccn23)c1OC,2 +EOS53211,Cc1cc(F)ccc1NC(=O)Cn1c(=O)n(C)c2ccccc21,2 +EOS11168,COCCN1CC(C(=O)NCCn2nc(-c3ccccc3)ccc2=O)CCC1=O,2 +EOS37474,Cc1cc(=O)[nH]cc1C(=O)N1CC[C@H](Oc2ccccc2)C1,2 +EOS51623,O=C(Cc1csc(C2CCCC2)n1)N1CC2(CCNCC2)[C@H]2COC[C@H]21,2 +EOS25225,COc1ccc(-n2nc3oc4c(O)cccc4cc-3c2=O)cc1,2 +EOS80131,COc1ccc(NC(=O)c2ccc3nc(C)ccc3c2)cc1CO,2 +EOS69425,CCCOc1ccc(C(F)(F)F)cc1NC(=O)CN1CSCC1=O,2 +EOS60423,C#CCOc1ccc(CCNC(=O)Cn2cccn2)cc1,2 +EOS27923,CCn1c(N2CCCC(C(=O)NCC3CCN(Cc4cccc(C)c4)CC3)C2)cc(=O)n(C)c1=O,2 +EOS29821,Cc1noc(-c2ccnc(-n3cnc(C(=O)NC4CCCCC4)c3)c2)n1,2 +EOS84718,CN1CCC(N(C)C(=O)c2nn(-c3ccccc3)c(=O)c3ccccc23)CC1,2 +EOS78472,CCc1n[nH]c([C@@H]2CN(C(=O)CCn3ccnc3)CCO2)n1,2 +EOS78345,COc1cc(CN2CCC([C@H](O)c3nccn3C)CC2)cc(OC)c1,2 +EOS4115,CN(C(=O)CC1(c2cccc(C(F)(F)F)c2)CC(=O)N(C2CC2)C1=O)C1CCOC1,2 +EOS73098,CO[C@@H](C)CS(=O)(=O)Nc1ccc2nscc2c1,2 +EOS81365,COc1ccc(-c2nc(CN3CCN(c4nc(C)cs4)CC3)co2)cc1,2 +EOS87658,COc1c(C)cc(CNC[C@H]2CN(C)CCO2)cc1C,2 +EOS55331,O=C(c1ccc(-c2ccccc2)[nH]c1=O)N1CC[C@H]1c1ccccc1,2 +EOS14144,CCNc1cc(N2CCN(C(=O)c3ccc(C)cc3)CC2)nc(C)n1,2 +EOS809,CCc1nn2c(=O)cc(CSCC(=O)Nc3cccc(C)c3)nc2s1,2 +EOS17520,CCN(CC)C(=O)c1ccc2c(c1)N(CC(=O)NCc1ccc(C)cc1)C(=O)CS2,2 +EOS56103,O=c1[nH]c2ccccc2nc1CN1CCC[C@H]1c1cccs1,2 +EOS67188,COc1ccc(C)cc1-n1ccc(C(=O)N2CCOC[C@@H]2C)n1,2 +EOS94960,Cc1ccc2[nH]c([C@H]3CCCN(S(=O)(=O)CCCF)C3)nc2c1,2 +EOS1652,COCCNC(=O)c1cc2n(n1)C[C@@H](NC(=O)C1(c3ccc(OC)cc3)CC1)C2,2 +EOS49107,Cc1[nH]c(=O)[nH]c1C(=O)N1C[C@H](COc2ccccc2)OC(C)(C)C1,2 +EOS49879,Cc1noc([C@H](C)N2CCN(c3cc(Cl)ccc3C#N)CC2)n1,2 +EOS49048,COc1ccccc1N1CCN(C(=O)Nc2cccc3cccnc23)CC1,2 +EOS16429,CNC(=O)N1CCC(C2CCN(C(=O)c3cccc4[nH]ccc34)C2)CC1,2 +EOS63304,CCCc1n[nH]c([C@@H]2CN(C(=O)c3cnccn3)CCO2)n1,2 +EOS72950,O=C(CCc1ccc(=O)[nH]c1)N[C@H]1[C@@H]2CN(Cc3ccccc3)C[C@@H]21,2 +EOS14232,CCS(=O)(=O)N1CCN(c2ccc(Nc3cccc(C)n3)nn2)CC1,2 +EOS74664,O=C(Nc1ccccc1C(=O)Nc1ccc(Cl)cn1)c1ccon1,2 +EOS91830,Cc1noc2nc(C(C)C)cc(C(=O)N3CCOC[C@@]3(C)C3CC3)c12,2 +EOS90365,CCOc1ccc(C(=O)N[C@@H](C)c2cccs2)cc1OCC,2 +EOS5048,COc1ccc([C@@H]2CN(C(=O)CCc3ccncc3)[C@@H]3C4CCN(CC4)[C@H]23)cc1,2 +EOS70509,CN1CCN(C(=O)Nc2ccn(CC(F)F)n2)Cc2ccccc21,2 +EOS57960,c1ccc(-c2nnc3ccc(N[C@H]4CCCC45OCCO5)nn23)cc1,2 +EOS58938,Cc1nn(C)c(C)c1CC(=O)N1CCC[C@H](n2ccnc2)C1,2 +EOS36643,CS(=O)(=O)N1CC(=O)N2CCc3ccccc3C2C1,2 +EOS32108,CCOCC(=O)N1CCC2(CC1)CN(c1ccc(C#N)c(C(F)(F)F)c1)CC2C(=O)NC,2 +EOS29164,CC(=O)Nc1ccc(-n2cnc(C(=O)N3CCN(c4cccc(C)c4C)CC3)c2)nc1,2 +EOS46827,Cc1ccc(CN(C)CC(=O)N(C)c2c(N)n(Cc3ccccc3)c(=O)[nH]c2=O)cc1,2 +EOS41610,CC(C)NC(=O)CN1CCCC[C@@H]1Cc1ccc(O)cc1,2 +EOS36335,Cn1c(=O)c(C(=O)NCc2cccnc2)c(O)c2ccccc21,2 +EOS921,Cc1nc(-c2ccc(N3CCN(C(=O)c4ccccc4)CC3)cc2)no1,2 +EOS102177,C[C@]12CCC3[C@@H](CC[C@]4(O)C[C@@H](O)CC[C@]34C=O)[C@@]1(O)CC[C@@H]2C1=CC(=O)OC1,2 +EOS26047,Cc1cccc(NC(=O)C2CCCN(S(=O)(=O)c3cn(C)cn3)C2)c1C,2 +EOS52350,Cc1ccc(S(=O)(=O)NCC[C@H]2CCNC2)c(C)c1.Cl,2 +EOS100819,CC1(COc2ccc3c(c2)ncn3-c2ccc3cccc(N4CCC(N)CC4)c3n2)COC1,2 +EOS37615,CN1C[C@@H](CCNC(=O)Nc2ccn3ccnc3c2)CC1=O,2 +EOS49443,NC(=O)c1cccc(Cc2noc([C@H]3CC34CCOCC4)n2)c1,2 +EOS77529,O=C(NCc1ccc(O)c(F)c1)Nc1ccc(-c2csnn2)cc1,2 +EOS87285,Cc1noc([C@@H]2COCCN2Cc2ccc(F)cc2F)n1,2 +EOS100247,CN1CCN(c2ccc(-c3ccncc3-c3cc(F)c(O)c(F)c3)cc2)CC1,2 +EOS9367,O=S(=O)(c1cccs1)N1CCc2[nH]cnc2C1c1cccnc1,2 +EOS59854,O=S1(=O)CCC(S(=O)(=O)N2CCC[C@H]2c2cccc3ccccc23)CC1,2 +EOS68838,Nc1nnc(SCC(=O)N2CCNC2=O)s1,2 +EOS25661,Cc1ccc(CNC(=O)CC(C)n2nc(C)cc2C)cc1,2 +EOS68480,Cc1cnn(C[C@H]2CCCCN2Cc2nccn2CC(F)(F)F)c1,2 +EOS86969,COc1ccccc1NC(=O)c1ccc(S(C)(=O)=O)cc1,2 +EOS53930,CC(=O)N1CCN([C@@H](C)C(=O)Nc2ccc(Cl)cc2)CC1,2 +EOS92311,COCC[C@@H](CO)NC(=O)c1csc(-c2cnc[nH]2)n1,2 +EOS75702,CC[C@H]1C(=O)NCCN1C(=O)c1cn(-c2ccc(Br)cc2)cn1,2 +EOS20463,CCn1nccc1C(=O)N1CCCC(N(Cc2cccnc2)C(=O)COC)C1,2 +EOS55812,Cc1nonc1CC(=O)N1CCCSC1,2 +EOS88541,Cc1cc(C)n(C[C@H]2CCCN2C(=O)c2c(C)noc2C)n1,2 +EOS96439,CCN1CC(=O)N([C@H]2CCCOc3cc(Cl)ccc32)C1=O,2 +EOS47481,Cc1ccc(N2C[C@@H](C(=O)N3CCC(C(=O)NCc4ccccc4)CC3)CC2=O)cc1F,2 +EOS2110,COCCn1c(SCCCc2ccccc2)nc2c1c(=O)[nH]c(=O)n2C,2 +EOS41657,CN1C(=O)N[C@@H](c2cccs2)C2=C1CN(CCO)C2=O,2 +EOS97500,C[C@H](NCCN1CCc2ccccc2C1=O)c1ccc(=O)[nH]n1,2 +EOS40840,CNC(=O)c1ccc(OCc2cc(F)cc3cccnc23)cc1,2 +EOS93595,Cc1cc(NC(=O)NC[C@H](C)N2CCOCC2)no1,2 +EOS26486,O=C(c1ccccc1)N1CCN(C(=O)c2cc(-c3ccncc3)[nH]n2)CC1,2 +EOS37008,O=C1NC2(CCCC2)C(=O)N1Cc1coc(-c2cccc(F)c2)n1,2 +EOS54478,COc1ccc(C(=O)Nc2ccc(NC(C)=O)cc2)cc1Br,2 +EOS23573,Cc1cc(NC(=O)c2cc(S(=O)(=O)N3CCOCC3)cn2C)ccc1Br,2 +EOS41714,CCN(C)C(=O)c1cccc(C(=O)Nc2cccc(-n3cccn3)c2)c1,2 +EOS42037,CC[C@H]1CN(C(=O)c2cn3c(n2)CCC3)CCN1CC(F)F,2 +EOS54525,COc1ccc(NC(C)=O)cc1Nc1ncnc2ccsc12,2 +EOS89481,Cc1cccc(S(=O)(=O)NC(=O)Cc2ccc(OCC#N)cc2)c1,2 +EOS60072,CCN(CC)C(=O)c1nn(-c2ccccc2)c(=O)c2ccccc12,2 +EOS21545,Cc1ccc(S(=O)(=O)Nc2ccc3c(c2)oc(=O)n3C)cc1,2 +EOS92249,Cc1ccnc(NC(=O)CN(C)CC(=O)N(C)Cc2ccco2)c1,2 +EOS91561,Cc1nc(CN2CCN(C(=O)Nc3cnn([C@@H](C)C4CC4)c3)CC2)oc1C,2 \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/model_retrain/test_model_retrain.py b/atomsci/ddm/test/integrative/model_retrain/test_model_retrain.py new file mode 100644 index 00000000..3063acc8 --- /dev/null +++ b/atomsci/ddm/test/integrative/model_retrain/test_model_retrain.py @@ -0,0 +1,86 @@ +import atomsci.ddm.pipeline.parameter_parser as parse +import atomsci.ddm.pipeline.model_pipeline as mp +import atomsci.ddm.pipeline.compare_models as cm +import atomsci.ddm.utils.model_file_reader as mfr +import atomsci.ddm.utils.test_utils as tu +import atomsci.ddm.utils.model_retrain as mr +import os +import shutil +import glob +import json + +def clean(result_dir): + if os.path.exists(result_dir): + shutil.rmtree(result_dir) + +def train_model(result_dir): + """Train a model in production mode""" + + json_file = tu.relative_to_file(__file__, './config.json') + example_file = tu.relative_to_file(__file__, './example.csv') + + with open(json_file, 'r') as f: + config_json = json.load(f) + config_json['dataset_key'] = example_file + config_json['result_dir'] = result_dir + + # Parse parameters + params = parse.wrapper(config_json) + + # Create model pipeline + model = mp.ModelPipeline(params) + + # Train model + model.train_model() + +def retrain_model(model_tar, new_result_dir, keep_seed): + """Retrains a model""" + mr.train_model_from_tar(model_tar, new_result_dir, keep_seed=keep_seed) + +def run_test_retrain(keep_seed): + """Trains and retrains a model + + Trains and retrains a model and compares the results + """ + + # train a model + result_dir = tu.relative_to_file(__file__, 'result') + train_model(result_dir) + + # find the tar file + result_df = cm.get_filesystem_perf_results(result_dir) + assert(len(result_df) == 1) + model_tar = result_df['model_path'].values[0] + + # retrain the model + new_result_dir = tu.relative_to_file(__file__, 'retrain_result') + retrain_model(model_tar, new_result_dir, keep_seed) + + # find the new tar file + result_df = cm.get_filesystem_perf_results(new_result_dir) + assert(len(result_df) == 1) + new_model_tar = result_df['model_path'].values[0] + + original_model = mfr.ModelFileReader(model_tar) + new_model = mfr.ModelFileReader(new_model_tar) + + assert new_model.get_split_uuid() == original_model.get_split_uuid() + + if keep_seed: + assert new_model.get_random_seed()==original_model.get_random_seed() + else: + assert new_model.get_random_seed()!=original_model.get_random_seed() + + # clean files + split_files = glob.glob(tu.relative_to_file(__file__, './example_*_random_*.csv')) + for sf in split_files: + os.remove(sf) + clean(new_result_dir) + clean(result_dir) + +def test_retrain(): + run_test_retrain(True) + run_test_retrain(False) + +if __name__ == '__main__': + test_retrain() \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/multitask_split/test_split.py b/atomsci/ddm/test/integrative/multitask_split/test_split.py index d47ecc3b..f89d6fdb 100644 --- a/atomsci/ddm/test/integrative/multitask_split/test_split.py +++ b/atomsci/ddm/test/integrative/multitask_split/test_split.py @@ -41,6 +41,54 @@ def clean(): delete_file('thirty_gen_split.csv') delete_file('ss_split.csv') +def test_seeded_splits(): + clean() + + init_data() + + smiles_col = 'compound_id' + id_col = 'compound_id' + frac_train = 0.8 + frac_test = 0.1 + frac_valid = 0.1 + num_super_scaffolds = 60 + dfw = 2 # chemical distance importance weight + rfw = 1 # split fraction importance weight + + total_df = pd.read_csv('KCNA5_KCNH2_SCN5A_data.csv', dtype={id_col:str}) + response_cols = ['target_KCNA5_standard_value', + 'target_KCNH2_standard_value', + 'target_SCN5A_activity'] + + # ------------------------------------------------------------------------- + # one generation multitask scaffold split + mss = MultitaskScaffoldSplitter() + A_split_df = split_with(total_df, mss, + smiles_col=smiles_col, id_col=id_col, response_cols=response_cols, + diff_fitness_weight_tvt=dfw, ratio_fitness_weight=rfw, num_generations=1, + num_super_scaffolds=num_super_scaffolds, + frac_train=frac_train, frac_test=frac_test, frac_valid=frac_valid, seed=0) + + b_mss = MultitaskScaffoldSplitter() + B_split_df = split_with(total_df, b_mss, + smiles_col=smiles_col, id_col=id_col, response_cols=response_cols, + diff_fitness_weight_tvt=dfw, ratio_fitness_weight=rfw, num_generations=1, + num_super_scaffolds=num_super_scaffolds, + frac_train=frac_train, frac_test=frac_test, frac_valid=frac_valid, seed=0) + + c_mss = MultitaskScaffoldSplitter() + C_split_df = split_with(total_df, c_mss, + smiles_col=smiles_col, id_col=id_col, response_cols=response_cols, + diff_fitness_weight_tvt=dfw, ratio_fitness_weight=rfw, num_generations=1, + num_super_scaffolds=num_super_scaffolds, + frac_train=frac_train, frac_test=frac_test, frac_valid=frac_valid, seed=42) + + assert all(A_split_df['cmpd_id']==B_split_df['cmpd_id']) and all(A_split_df['subset']==B_split_df['subset']) + # compounds can be in the same order + assert not all(A_split_df['subset']==C_split_df['subset']) + + clean() + def test_splits(): clean() @@ -69,7 +117,7 @@ def test_splits(): smiles_col=smiles_col, id_col=id_col, response_cols=response_cols, diff_fitness_weight_tvt=dfw, ratio_fitness_weight=rfw, num_generations=1, num_super_scaffolds=num_super_scaffolds, - frac_train=frac_train, frac_test=frac_test, frac_valid=frac_valid) + frac_train=frac_train, frac_test=frac_test, frac_valid=frac_valid, seed=0) mss_split_df.to_csv('one_gen_split.csv', index=False) assert len(total_df) == len(mss_split_df) @@ -86,7 +134,7 @@ def test_splits(): diff_fitness_weight_tvt=dfw, ratio_fitness_weight=rfw, num_generations=num_generations, num_super_scaffolds=num_super_scaffolds, - frac_train=frac_train, frac_test=frac_test, frac_valid=frac_valid) + frac_train=frac_train, frac_test=frac_test, frac_valid=frac_valid, seed=0) mss_split_df.to_csv('thirty_gen_split.csv', index=False) assert len(total_df) == len(mss_split_df) @@ -166,6 +214,7 @@ def test_pipeline_split_and_train(): clean() if __name__ == '__main__': - test_splits() + test_seeded_splits() + #test_splits() #test_pipeline_split_only() #test_pipeline_split_and_train() \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/nanobret_multitask_classification_data.csv b/atomsci/ddm/test/integrative/sampling_test/nanobret_multitask_classification_data.csv new file mode 100755 index 00000000..f2d8d242 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/nanobret_multitask_classification_data.csv @@ -0,0 +1,431 @@ +,compound_id,base_rdkit_smiles,NEK1_relation,NEK1_mean_pIC50,NEK11_relation,NEK11_mean_pIC50,NEK2_relation,NEK2_mean_pIC50,NEK3_relation,NEK3_mean_pIC50,NEK4_relation,NEK4_mean_pIC50,NEK5_relation,NEK5_mean_pIC50,NEK6_relation,NEK6_mean_pIC50,NEK9_relation,NEK9_mean_pIC50,NEK1_active,NEK11_active,NEK2_active,NEK3_active,NEK4_active,NEK5_active,NEK6_active,NEK9_active +0,PAR_272,O=C(c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)N1CCC(N2CCCCC2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +1,PAR_315,CCN(CC)S(=O)(=O)c1cc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)ccc1Cl,,5.552056004461681,,5.4574860448455205,,5.238326759769438,,5.430757234170256,,5.445163827978223,,5.486603647930933,<,4.522878745280337,,5.252417203511357,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +2,ZDG_7_52_4,CN1CCN(c2ccc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)nc2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +3,HO_N_101,COc1ccc(Nc2nc(-c3ccc(OC)c(OC)c3)cc3nccn23)c(C(N)=O)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +4,PAR_337,CC(C)(Oc1ccccc1)C(=O)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.779291879459036,,5.032271077856564,,4.6690495057318,,6.592673502322183,,4.820981977097612,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0 +5,Narazaciclib,CN1CCN(c2ccc(Nc3ncc4cc(C#N)c(=O)n(C5CCCC5)c4n3)cc2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +6,HO_N_57_1,COc1ccc(-c2cc3nccn3c(Nc3ccccc3C#N)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +7,BA_03_61_01,COc1cc(Nc2ncc3c(C)cc(=O)n(C4CCOCC4)c3n2)cc(OC)c1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +8,PAR_363,N#CCNC(=O)c1ccc(-c2ccnc(Nc3cccc(CN4CCOCC4)c3)n2)cc1,,6.151159915288579,,4.583360913493843,,4.832123723463034,,6.1451321031000266,,4.635987721962014,,6.851435811405105,<,4.522878745280337,,5.269015756005547,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0 +9,PAR_182,CN1CCN(c2ccc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)cc2F)CC1,,5.509596275828427,<,4.522878745280337,,4.835439584676596,,5.8385226992247405,<,4.522878745280337,,6.49132807858564,<,4.522878745280337,,5.156335184776021,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +10,PAR_335,C#Cc1cc(Nc2nccc(-c3ccc(NC(=O)C4CCCN4)cc3)n2)ccc1Cl,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.237059415948903,<,4.522878745280337,,5.254416153865931,<,4.522878745280337,,4.802579991224726,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +11,BA_03_69_c,COc1cc2ncnc(-c3ccc(C(N)=O)c(F)c3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +12,PAR_334,O=C(Nc1ccc(-c2ccnc(Nc3cccc(N4CCOCC4)c3)n2)cc1)C1CCCN1,,5.027331341083788,<,4.522878745280337,<,4.522878745280337,,6.230662437350819,<,4.522878745280337,,5.892418801404399,<,4.522878745280337,,5.308137455897522,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0 +13,ZDG_7_50_2,CN1CCN(c2ccc(Nc3nccc(-c4ccc(NC(=O)[C@@H]5CCCN5)cc4)n3)cc2F)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +14,HO_N_135_4_A,COc1ccc(-c2cc3nccn3c(Nc3ncccc3C(N)=O)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +15,3746,N#Cc1c(Nc2nc(Nc3ccc(N4CCNCC4)cc3)ncc2Cl)cccc1OCc1c(F)cccc1F,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +16,3827,CCOc1ccc2c(-c3ccnc(Nc4cccc(Br)c4)n3)cnn2n1,,,,,,,,,,,,6.752026733638193,,,,,,,,,,1.0,, +17,Altiratinib,O=C(Nc1cc(Oc2cc(F)c(NC(=O)C3(C(=O)Nc4ccc(F)cc4)CC3)cc2F)ccn1)C1CC1,,5.250812839247721,,5.092031771170105,,5.0900326887847855,,5.022491146605316,,5.346348290654262,<,4.522878745280337,<,4.522878745280337,,4.975134466817432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +18,PAR_252,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)c1cccc(-c2cccnc2)c1,,5.0542453284308175,,5.100565607974711,,4.900463929543359,,5.012345426898412,,4.460761357368833,,4.980072153113726,<,4.522878745280337,,4.766580186299862,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +19,ATH686,CCN1CCN(Cc2ccc(NC(=O)Nc3ccc(Oc4ccnc(N)n4)cc3)cc2C(F)(F)F)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.681749789015572,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +20,TL01_022,COc1ccc(Nc2nccc(-c3ccc(S(=O)(=O)NCCN)cc3)n2)cc1,,5.712814683943481,<,4.522878745280337,,4.6322874985094575,,4.978363242436672,<,4.522878745280337,,6.341054794006962,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +21,ZDG_7_43_3,Cn1cc(-c2ccc(N3CCOCC3)cc2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +22,BA_03_53_12,COc1ccc2c(-c3ccnc(Nc4cccc(N5CCOCC5)c4)n3)cnn2n1,,6.062751461067501,,5.118481547126134,,5.487245060747732,,6.373063665619277,,5.033558942245732,<,4.522878745280337,<,4.522878745280337,,6.705887594577249,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0 +23,PAR_379,CN(C)CCCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.05937779651617,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +24,PAR_376,O=C(CC12CC3CC(CC(C3)C1)C2)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.953028453525107,,5.006589807434891,,4.846953375389458,,5.046613355088627,,4.981795846064236,,5.115687643819264,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +25,PAR_375,Cc1cccc(C(=O)Nc2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)n1,,5.051954664361819,,5.048577381602782,,4.824723451097711,,4.943373766429193,,4.945726828962131,,5.188651826933941,<,4.522878745280337,,4.915625881147178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +26,PAR_331,O=C(Nc1ccc(-c2ccnc(Nc3cc(N4CCOCC4)cc(C(F)(F)F)c3)n2)cc1)C1CCCN1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.738666862982437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +27,BA_03_50_04,COc1ccc2ncnc(Nc3cccc(S(N)(=O)=O)c3)c2c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +28,PAR_403,COc1ccccc1CC(=O)N[C@H]1CCN(c2ccnc(Nc3cc(OC)c(OC)c(OC)c3)n2)C1,,4.622548972536967,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +29,GSK329,CNc1cc(Oc2c(Cl)cc(NC(=O)Nc3cccc(C(F)(F)F)c3)cc2Cl)ncn1,<,4.522878745280337,,5.1792726384345125,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +30,PAR_225,N#CCNC(=O)c1ccc(-c2ccnc(NCCn3ccnc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +31,PAR_380,O=C(Nc1cc(O)ccn1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.920029645072777,<,4.522878745280337,<,4.522878745280337,,6.095333598804788,<,4.522878745280337,,6.511594894580872,<,4.522878745280337,,5.183946010215339,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0 +32,ZDG_7_46_3,Cn1cc(-c2cc3ccccc3o2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +33,HO_N_138_3_A,Cn1ncc2cc(-c3cc4nccn4c(Nc4ncccc4C(N)=O)n3)ccc21,,4.55056495781959,<,4.522878745280337,<,4.522878745280337,,4.672852955355713,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +34,HO_N_90,COc1ccc(-c2cc3nccn3c(Nc3cc4ccccc4cn3)n2)cc1OC,,4.764212049256466,,4.67578301752895,,4.644063314968692,<,4.522878745280337,,4.765623989645558,<,4.522878745280337,<,4.522878745280337,,4.90264845805378,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +35,PAR_142,O=C(NCCC1CCOCC1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.6308182614698215,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +36,BA_03_56_12,CCOc1ccc2c(-c3ccnc(Nc4cccc(N5CCOCC5)c4)n3)cnn2n1,,6.02374236875252,,5.056969533906477,,5.365743632392702,,6.281789923080026,,4.976667870676705,,7.744195610646878,<,4.522878745280337,,6.463483079487036,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0 +37,TL01_026,COc1ccccc1Nc1nccc(-c2ccc(S(=O)(=O)NCCN)cc2)n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +38,GW843682,COc1cc2ncn(-c3cc(OCc4ccccc4C(F)(F)F)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +39,BA_03_60_04,CCCCn1c(=O)cc(C)c2cnc(Nc3ccc(N4CCN(C)CC4)c(F)c3)nc21,,4.561733678166957,<,4.522878745280337,,5.711470552571224,<,4.522878745280337,,4.637347915145522,,5.159545510921731,<,4.522878745280337,,5.401638890483255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +40,HO_N_135_2_A,NC(=O)c1cccnc1Nc1nc(-c2ccc3c(c2)OCCO3)cc2nccn12,<,4.522878745280337,,4.986066288377341,,5.090364874929746,,5.502197651941389,,5.08345638049453,,5.49749941815019,<,4.522878745280337,,5.4066312032053725,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +41,AST487,CCN1CCN(Cc2ccc(NC(=O)Nc3ccc(Oc4cc(NC)ncn4)cc3)cc2C(F)(F)F)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +42,PAR_310,N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(Cl)cc3)n2)cc1,,5.458554470785787,,5.096843984530558,,4.728376307122485,,5.254035848504938,<,4.522878745280337,,6.376312835930973,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +43,PAR_268,CC(C)(C)CNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.890530010199987,,4.985652618950243,,4.924572396157946,,5.500165740249142,<,4.522878745280337,,6.4941348376070565,<,4.522878745280337,,5.183988608659089,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +44,HO_N_133_2_A,NC(=O)c1cccnc1Nc1nc(-c2ccnc(F)c2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +45,BA_03_53_01,COc1ccc2c(-c3ccnc(Nc4cc(OC)c(OC)c(OC)c4)n3)cnn2n1,,5.773880703765544,,5.112542418995005,,5.374874677070926,,5.551742176902253,,4.927485323089146,,7.174881717215393,<,4.522878745280337,,6.891121194058334,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0 +46,TL01_019,O=S(=O)(NCCNS(=O)(=O)C1CC1)c1ccc(-c2ccnc(Nc3ccccc3)n2)cc1,,5.6302895225802505,,5.015573097083401,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,6.447615587988817,<,4.522878745280337,,4.991791916486836,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +47,PAR_291,COc1cc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)ccc1N1CCOCC1,,4.653285401787367,<,4.522878745280337,<,4.522878745280337,,4.997500809031838,<,4.522878745280337,,5.326959152242042,<,4.522878745280337,,4.894680040351775,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +48,ZDG_7_40_C,Cc1ccccc1CCC(=O)N1CCc2cc(-c3cn(C)c4ncnc(N)c34)ccc21,,5.200969778775301,,4.776809615678486,,5.775092389602924,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +49,ZDG_6_64,Cn1nc(-c2cccc(NS(C)(=O)=O)c2)c2nc(Nc3ccc(Cl)cc3)ncc21,,5.424252817681134,,5.174190495793877,,5.275611497701943,,5.367483415538971,,5.211276375328347,<,4.522878745280337,<,4.522878745280337,,5.421185973017695,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +50,HO_N_136_6_A,Cn1nccc1-c1cc2nccn2c(Nc2ncccc2C(N)=O)n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +51,BA_03_59_02,Cc1cc(=O)n(C2CCC2)c2nc(Nc3cccc(N4CCOCC4)c3)ncc12,,5.001601822254985,,4.693183054753221,,6.373729650814312,<,4.522878745280337,<,4.522878745280337,,6.082990515782506,<,4.522878745280337,,5.437486302264579,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0 +52,BA_03_78_d,Clc1ccc(CCNc2ncnc3sc(Br)cc23)cc1,<,4.522878745280337,,5.350427583242821,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +53,ZDG_7_44_1,Cn1cc(C2=CCOCC2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +54,PAR_294,CN1CCN(c2ccc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)cc2)CC1,,5.044635900449657,<,4.522878745280337,,4.560321091507077,,5.273291252702858,<,4.522878745280337,,5.827809393613137,<,4.522878745280337,,4.842322140279872,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +55,BBT594,CC(=O)Nc1cc(Oc2ccc3c(c2)CCN3C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)ncn1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.799164165543371,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +56,PAR_298,CN1CCN(C(=O)c2ccc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)cc2)CC1,,5.251145281525807,<,4.522878745280337,<,4.522878745280337,,5.0225671244809424,<,4.522878745280337,,5.613491143015396,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +57,PAR_269,O=C(NCC1CCC1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.994533139752917,,4.927268579769375,,4.883063139568752,,5.352743745097549,,4.959082244697873,,5.608610180689029,<,4.522878745280337,,5.144686349387098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +58,PAR_404,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)c4ccc(F)cc4F)C3)n2)cc(OC)c1OC,,5.558465684840074,,4.541361905337933,,4.900332026351334,<,4.522878745280337,,4.739977521082981,,5.572057466455448,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +59,EF_3_203,CS(=O)(=O)c1cccc(Nc2nccc(N3CC[C@H](NC(=O)COc4ccccc4)C3)n2)c1,,4.784048510365078,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.0071295739106665,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +60,HO_N_96,CN1CCN(c2ccc(Nc3ncc4c(n3)c(-c3cccc(NS(C)(=O)=O)c3)nn4C)nc2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.408824154707516,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +61,ZDG_7_51_5,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3ccc(C(=O)N4CCN(C)CC4)cc3)ncc12,,4.90796498375648,<,4.522878745280337,,5.137433522832007,<,4.522878745280337,,4.62633228335401,,5.742267504842507,<,4.522878745280337,,5.141072538859764,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +62,TL01_020,NCCNS(=O)(=O)c1ccc(-c2ccnc(NC3CCNCC3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +63,PAR_261,CC1CC1C(=O)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.354036283168988,,4.914258788385221,,4.810569214288567,,5.059421625497709,,4.602053372163663,,5.929412530692798,<,4.522878745280337,,4.926643299867915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +64,HO_N_42,COc1ccc(-c2cc3nccn3c(Nc3ccccc3C(N)=O)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +65,BA_03_50_15,COc1ccc2ncnc(NCCc3cccc(F)c3)c2c1,<,4.522878745280337,,5.138509873236028,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +66,PAR_158,CC(C#N)NC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.7386084272523705,,4.701338559819312,,5.121475239081094,,6.005153475423644,,4.729687113950752,,6.675994355441816,<,4.522878745280337,,5.366898760053524,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0 +67,PAR_355,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3ccn(C(F)F)n3)ncc12,,4.600182760304779,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.357145720986832,<,4.522878745280337,,4.694706720393334,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +68,GSK2606414,Cn1cc(-c2ccc3c(c2)CCN3C(=O)Cc2cccc(C(F)(F)F)c2)c2c(N)ncnc21,,4.750939925719619,,5.224233686651976,,5.383612695573137,<,4.522878745280337,,4.256337912912628,,4.674469636983738,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +69,PAR_402,CNc1ccc(C(=O)N[C@H]2CCN(c3ccnc(Nc4cc(OC)c(OC)c(OC)c4)n3)C2)cc1,,5.3976646548335685,,5.152360176139837,,5.100609381453203,<,4.522878745280337,,5.219988218415373,,5.787184715248259,<,4.522878745280337,,4.920008110197255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +70,3820,CCOc1ccc2c(-c3ccnc(Nc4ccc(N5CCN(C)CC5)c(F)c4)n3)cnn2n1,,,,,,,,,,,,7.327902142064281,,,,,,,,,,1.0,, +71,Tovorafenib,C[C@@H](NC(=O)c1ncnc(N)c1Cl)c1ncc(C(=O)Nc2cc(C(F)(F)F)c(Cl)cn2)s1,<,4.522878745280337,,4.768657012738542,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +72,ZDG_6_75_4,COc1cc(Nc2ncc3c(n2)c(-c2ccc(F)c(F)c2)nn3C)cc(OC)c1OC,,6.084243400188249,<,4.522878745280337,,5.6204374173285006,,5.94641893903062,<,4.522878745280337,,7.6506234286994035,<,4.522878745280337,,5.529835856645433,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +73,BA_03_50_18,COc1ccc2ncnc(NCCc3cccc(Br)c3)c2c1,<,4.522878745280337,,5.173425534719117,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +74,PAR_323,Cc1ccc(Nc2nccc(-c3ccc(NC(=O)C4CCCN4)cc3)n2)cc1O,,4.835689359282434,,4.573961326928361,<,4.522878745280337,,5.734303057224471,<,4.522878745280337,,5.839903129257649,<,4.522878745280337,,4.7368123982363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +75,PAR_112,CC(C)(CN)CNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +76,HO_N_105,COc1ccc(-c2cc3nccn3c(Nc3ccc(N4CCN(C)CC4)cc3)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +77,BA_03_56_11,CCOc1ccc2c(-c3ccnc(Nc4cc(N5CCOCC5)cc(C(F)(F)F)c4)n3)cnn2n1,,4.87182347956317,<,4.522878745280337,,4.742710778519723,,4.791763951581726,<,4.522878745280337,,6.229989330291368,<,4.522878745280337,,6.356460014325777,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0 +78,GCN2_IN_1,c1nn(C2CCOCC2)cc1Nc1ncc2nnn(-c3ccc4cn[nH]c4c3)c2n1,,4.73444963419206,<,4.522878745280337,,5.686990572523224,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +79,BA_03_61_07,COc1ccccc1Nc1ncc2c(C)cc(=O)n(C3CCOCC3)c2n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +80,PAR_314,Cc1cc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)ccc1N1CCOCC1,,5.400563622013164,,4.642276208868983,,4.830992345816636,,5.410659988231124,<,4.522878745280337,,6.183031258097228,<,4.522878745280337,,5.102286733139374,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +81,PAR_157,N#CNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.608026971547095,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +82,PAR_109,CC(C)N(C(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)C(C)C,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.74539665738371,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +83,ZDG_7_39_A,Cn1cc(-c2ccc3c(c2)CCN3C(=O)Cc2ccc(C(C)(C)C)cc2)c2c(N)ncnc21,,4.715600709998829,,5.687502309749495,,6.283088612988116,<,4.522878745280337,,5.209563989701855,,4.666105933885059,<,4.522878745280337,<,4.522878745280337,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +84,PAR_406,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)C4CCN(C)CC4)C3)n2)cc(OC)c1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +85,BA_03_55_01,COc1cc(Nc2nccc(-c3cnn4ncccc34)n2)cc(OC)c1OC,,5.132987567403431,<,4.522878745280337,,4.856408882448614,<,4.522878745280337,<,4.522878745280337,,5.939016896221744,<,4.522878745280337,,5.883734193517436,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +86,Axitinib,CNC(=O)c1ccccc1Sc1ccc2c(/C=C/c3ccccn3)n[nH]c2c1,<,4.522878745280337,,4.867104511077254,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +87,BA_03_50_05,COc1ccc2ncnc(Nc3c(F)cccc3F)c2c1,<,4.522878745280337,,4.596618862743608,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +88,3829,COc1cccc(Nc2nccc(-c3cnn4nc(OC)ccc34)n2)c1,,5.255655162286165,,4.846115892045417,,4.900122251889135,,5.610499231954557,,4.715548992582638,,6.705214911312019,<,4.522878745280337,,6.072790227115989,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0 +89,ZDG_7_31_A,Cn1cc(-c2ccc3c(c2)CCN3C(=O)Cc2cccc(Cl)c2)c2c(N)ncnc21,,5.452325606109792,,5.400397619401221,,5.831193316226468,<,4.522878745280337,<,4.522878745280337,,5.060020330334237,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +90,ZDG_2_91,CCn1nc(-c2cccc(NS(C)(=O)=O)c2)c2nc(Nc3cc(OC)c(OC)c(OC)c3)ncc21,<,4.522878745280337,,5.363141976659056,,6.800229265996677,,5.580865079060308,,5.392522529985525,,6.964068228651505,<,4.522878745280337,,6.9750003957975135,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0 +91,PAR_159,N#CCCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.553248711844009,<,4.522878745280337,,4.720220027609966,,5.556286074241282,<,4.522878745280337,,5.991463349980734,<,4.522878745280337,,5.309241281674459,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +92,HO_N_133_4_A,NC(=O)c1cccnc1Nc1nc(-c2ccc(S(=O)(=O)N3CCCC3)cc2)cc2nccn12,,5.167744975172301,,5.0077681177720725,<,4.522878745280337,,4.627196303033837,<,4.522878745280337,,4.887691096286184,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +93,2093,CN(C)C/C=C/C(=O)Nc1cccc(C(=O)Nc2ccc(Nc3nccc(-c4cccnc4)n3)cc2F)c1,,,,,,,,,,,,5.6925039620867866,,,,,,,,,,0.0,, +94,PAR_405,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)CC45CC6CC(CC(C6)C4)C5)C3)n2)cc(OC)c1OC,,4.981411343474717,,4.761225157557821,,4.72133828277282,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +95,HO_N_67,COc1ccc(-c2cc3nccn3c(Nc3ccccc3NS(C)(=O)=O)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +96,HO_N_136_5_A,NC(=O)c1cccnc1Nc1nc(-c2ccccc2F)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.766690674784758,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.600551282564767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +97,HO_N_57_2,COc1ccc(-c2cc3nccn3c(Nc3cccnc3C(N)=O)n2)cc1OC,,5.23997849830724,,4.969907720086619,,5.191222602450658,<,4.522878745280337,,5.242906129665422,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +98,PAR_320,CC(C)c1ccc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)cc1,,5.543472992608432,,4.928232072657166,<,4.522878745280337,,5.411014810538559,,4.755247751878081,,6.353178256283163,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +99,Encorafenib,COC(=O)N[C@@H](C)CNc1nccc(-c2cn(C(C)C)nc2-c2cc(Cl)cc(NS(C)(=O)=O)c2F)n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +100,ZDG_6_51_2,COc1cc(Nc2ncc3c(n2)c(-c2cccc(NC(=O)CC#N)c2)nn3C)cc(OC)c1OC,,6.52944874090885,,5.696549972826222,,6.530886968451726,,6.357595202448904,,5.638167199966386,,8.103824589968996,<,4.522878745280337,,7.184540852844173,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0 +101,BA_03_56_14,CCCCNC(=O)c1cc(F)cc(Nc2nccc(-c3cnn4nc(OCC)ccc34)n2)c1,,5.189636537748015,<,4.522878745280337,,5.204904108573477,,5.659405529983294,,5.19138944283402,,6.214638971935892,<,4.522878745280337,,5.645197983557392,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +102,3819,CCOc1ccc2c(-c3ccnc(Nc4ccc(OC)c(OC)c4)n3)cnn2n1,,,,,,,,,,,,7.537602002101043,,,,,,,,,,1.0,, +103,Rac_CCT_250863,CC(/C=C\C(F)(F)F)Oc1cc(-c2cc(-c3cc(CN(C)C)cs3)cnc2N)ccc1C(N)=O,<,4.522878745280337,<,4.522878745280337,,5.471954824304111,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +104,PAR_330,CS(=O)(=O)c1cccc(Nc2nccc(-c3ccc(NC(=O)C4CCCN4)cc3)n2)c1,,4.944417751456352,,4.548317198621807,<,4.522878745280337,,5.730813648408817,<,4.522878745280337,,5.480188371406996,<,4.522878745280337,,5.378530517324077,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +105,PAR_377,CC1(C)CC1C(=O)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,6.105174967139251,,5.753508816051484,,5.623341311836982,,5.80430564114047,,5.432611278347096,,6.792188865778642,<,4.522878745280337,<,4.522878745280337,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +106,PAR_383,CCCc1ccnc(NC(=O)c2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)c1,,5.29199862846524,,5.3124428431629465,,5.135043190488083,,5.174283523278509,,5.239410092259872,,5.375056794357965,<,4.522878745280337,,5.022852102453122,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +107,PAR_382,O=C(NCC1CC1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.371295924863484,,5.090630710422227,,4.993542118442684,,5.54391657291866,,5.043535161781369,,6.282045923356396,<,4.522878745280337,,5.492640141638793,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +108,ZDG_7_38_A,Cn1cc(-c2ccc3c(c2)CCN3C(=O)Cc2ccc(C#N)cc2)c2c(N)ncnc21,,5.04880558733107,,4.809090020124783,,5.060272274498873,<,4.522878745280337,,4.743080435741345,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +109,Bafetinib,Cc1ccc(NC(=O)c2ccc(CN3CC[C@H](N(C)C)C3)c(C(F)(F)F)c2)cc1Nc1nccc(-c2cncnc2)n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +110,ZDG_7_48_1,CN(C)CC(=O)Nc1ccc(-c2cn(C)c3ncnc(N)c23)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +111,BA_03_55_14,CCCCNC(=O)c1cc(F)cc(Nc2nccc(-c3cnn4ncccc34)n2)c1,,5.2047710048054485,,5.111339314960842,,5.124041250648066,,5.2210423353814015,,5.014494360747044,,5.303215985283345,<,4.522878745280337,,5.346217595956477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +112,HO_N_131_5_A,Cn1cc(-c2cc3nccn3c(Nc3ncccc3C(N)=O)n2)cn1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.628146143269041,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +113,PAR_371,Cc1nocc1C(=O)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.117821924352508,,4.687588190158389,,4.704974794280974,,5.327696133354295,,4.743105526322666,,5.725625892672547,<,4.522878745280337,,4.960449315064702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +114,PAR_369,CC1(C(=O)Nc2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)CCCC1,,6.127525053826286,,5.784071210080595,,5.50639449696777,,6.218905861112133,,4.771642567171203,,7.190435850083502,<,4.522878745280337,,6.041095247784531,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0 +115,ZDG_6_49_1,COc1cc(Nc2ncc3c(-c4cccc(NCC(F)(F)F)c4)nn(C)c3n2)cc(OC)c1OC,,6.691293263430712,,5.284512969713438,,6.212753276097712,,6.966896451516143,,5.369403937201246,<,4.522878745280337,<,4.522878745280337,,6.663831731885772,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +116,HO_N_136_4_A,CS(=O)(=O)Nc1cccc(-c2cc3nccn3c(Nc3ncccc3C(N)=O)n2)c1,,4.770388717953961,,4.543240069450591,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +117,ZDG_7_33_C,Cn1cc(-c2ccc3c(c2)CCN3C(=O)Cc2cscn2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +118,BA_03_51_15,Fc1ccc(CCNc2ncnc3ccc(F)cc23)cc1,<,4.522878745280337,,5.230087157442037,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +119,BA_03_66_a,COc1cc2ncn(-c3cc(OCc4sccc4OC)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +120,BA_03_53_11,COc1ccc2c(-c3ccnc(Nc4cc(N5CCOCC5)cc(C(F)(F)F)c4)n3)cnn2n1,,5.113626204819444,,4.769725578487549,,5.080090690342586,,5.1028409189818,,4.804196269857379,,6.217860891122144,<,4.522878745280337,,6.7256421310134185,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0 +121,ZDG_7_25_3,COc1cc2ncn(-c3cc(OCc4ncc(C(C)(C)C)o4)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +122,HO_N_116,COc1ccc(-c2cc3nccn3c(Nc3cccc(S(C)(=O)=O)c3)n2)cc1OC,,4.776373940503477,,4.6008913847120745,<,4.522878745280337,<,4.522878745280337,,4.526746521215781,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +123,Brepocitinib,Cn1cc(Nc2nccc(N3CC4CCC(C3)N4C(=O)[C@@H]3CC3(F)F)n2)cn1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +124,HO_N_115,COc1ccc(-c2cc3nccn3c(NCCn3ccnc3)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +125,BA_03_50_20,COc1ccc2ncnc(NCCc3ccc(Br)cc3)c2c1,<,4.522878745280337,,5.608640769372401,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +126,ZDG_7_44_3,Cn1cc(-c2cccc(OC(F)(F)F)c2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.417198605305975,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +127,BA_03_69_a,COc1cc2ncnc(-c3ccc(C(N)=O)cc3F)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +128,HO_N_137_A,Cc1noc(C)c1-c1cc2nccn2c(Nc2ncccc2C(N)=O)n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +129,HO_N_135_4_E,COC(=O)c1cccnc1Nc1nc(-c2ccc(OC)c(OC)c2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +130,PAR_249,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)C1CCC(O)CC1,,5.007421559397016,<,4.522878745280337,<,4.522878745280337,,5.55859615288902,<,4.522878745280337,,5.8989347544814414,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +131,PAR_361,CN1CCC(n2cc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)cn2)CC1,,5.695880618567663,<,4.522878745280337,,4.8412766118629005,,5.394258787411851,<,4.522878745280337,,5.953479188966091,<,4.522878745280337,,4.873614798292431,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +132,PAR_274,Cn1cc(CNC(=O)c2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)cn1,,4.936337868347781,,4.721888786879858,,4.647499419165893,,5.387318497835222,,4.658289417749323,,5.99379904251287,<,4.522878745280337,,5.211302575018534,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +133,ZDG_5_55_6,CCC(CN)NC(=O)c1ccc(-c2ccncc2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +134,ZDG_7_45_1,Cn1cc(-c2ccc(F)c(F)c2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +135,BA_03_50_21,COc1ccc2ncnc(NCCc3ccc(C)cc3)c2c1,<,4.522878745280337,,5.094618114663208,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +136,HO_N_95,COc1ccc(-c2cc3nccn3c(Nc3ccc(C(F)(F)F)cc3C(N)=O)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.948428348248115,<,4.522878745280337,,4.742395135371631,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +137,EF_3_101,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)Cc4ccc(C#N)cc4)C3)n2)cc(OC)c1OC,,5.276146420512976,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.325549249924125,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +138,ZDG_6_50_1,COc1cc(Nc2ncc3c(-c4cccc(NC(=O)N5CCCC5)c4)nn(C)c3n2)cc(OC)c1OC,,6.0959209900587465,,5.649537330654971,,6.00111694950526,,5.103189913336463,,5.355920820754059,,7.112201749932475,<,4.522878745280337,,5.987651242211193,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0 +139,ZDG_7_52_6,CN1CCN(c2cccc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)c2)CC1,,5.896288707022026,,4.591986977837131,,5.022729260872255,,5.875633568854028,,4.567101299453912,,6.736106345680188,<,4.522878745280337,,5.48560900469894,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +140,HO_N_104,COc1ccc(-c2cc3nccn3c(Nc3ccc(N4CCN(C)CC4)cn3)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +141,HO_N_99,CN1CCC(Nc2ncc3c(n2)c(-c2cccc(NS(C)(=O)=O)c2)nn3C)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +142,PAR_277,O=C(NCC(C(F)(F)F)C(F)(F)F)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.971615901589376,,4.599098959051357,<,4.522878745280337,,5.307259218619125,<,4.522878745280337,,6.143431243638508,<,4.522878745280337,,4.781712083555128,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +143,BA_03_50_01,COc1ccc2ncnc(CCc3ccccc3)c2c1,<,4.522878745280337,,5.238687151877101,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +144,ZDG_7_47_1,COc1ccc2cc(-c3cn(C)c4ncnc(N)c34)ccc2c1,,4.730819331639776,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +145,Culmerciclib,CC(C)c1c2cc(-c3nc(Nc4ccc(N5CCNCC5)cn4)ncc3F)ccc2nn1C,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +146,BA_03_55_12,c1cc(Nc2nccc(-c3cnn4ncccc34)n2)cc(N2CCOCC2)c1,,5.625983771126986,,4.96686922858421,,5.294320266103083,,5.976203849797534,,4.878083910966451,,6.683531800444131,<,4.522878745280337,,5.887845239897477,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +147,3_IN_PP1,CC(C)(C)n1nc(-c2c[nH]c3ccccc23)c2c(N)ncnc21,<,4.522878745280337,,4.741264885082795,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +148,GCN2iB,COc1ncc(Cl)cc1S(=O)(=O)Nc1ccc(F)c(C#Cc2cnc(N)nc2)c1F,,5.905601549290766,,5.494359015340263,,5.57696763782168,,5.521207096930893,,4.758877132726841,,6.438058632016267,<,4.522878745280337,,5.112039392346211,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +149,Tivozanib_hydrochloride_hydrate,COc1cc2nccc(Oc3ccc(NC(=O)Nc4cc(C)on4)c(Cl)c3)c2cc1OC,,4.90237267485998,,5.280179898185816,,4.983637127187442,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +150,PAR_89,O=C(Nc1nc2c(s1)-c1nc(-c3ccccc3Cl)ncc1CC2)NC1CCCCC1,,5.397748629891981,,4.927215857126657,,5.0284058896320944,,5.063894536958506,,4.93570189742568,,5.898064345968216,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +151,ZDG_7_23_1,COc1cc2ncn(-c3cc(OCc4ccc(Cl)cc4)c(C(N)=O)s3)c2cc1OC,,5.0638445779730095,,5.048480808348494,,5.154283327307828,<,4.522878745280337,,5.076771084592921,<,4.522878745280337,<,4.522878745280337,,4.827103001760606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +152,EF_3_103,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)Cc4ccc(C(C)(C)C)cc4)C3)n2)cc(OC)c1OC,<,4.522878745280337,,4.75519581981897,,4.7533897131839575,<,4.522878745280337,,4.8156208231632975,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +153,PAR_244,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)c1ccc[nH]1,,5.307870853260097,,5.124929880831664,,5.060368222007157,,5.105359328915169,,5.136804697549763,,5.463892030482261,<,4.522878745280337,,5.115628742612033,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +154,MAX_40279,COc1cc(F)ccc1-c1c(C)sc2cnc(Nc3cnn(C4CCNCC4)c3)nc12,,4.994315816911533,,4.992062700349886,,4.643409145529222,<,4.522878745280337,<,4.522878745280337,,5.46550356817656,<,4.522878745280337,,5.3554389564331055,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +155,PAR_299,CN1CCN(c2ccc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)cc2Cl)CC1,,5.402937008085848,<,4.522878745280337,,4.913672332165274,,5.857028087339601,<,4.522878745280337,,6.195701854835417,<,4.522878745280337,,5.30909544980474,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +156,BA_03_50_08,COc1ccc2ncnc(NCCNS(C)(=O)=O)c2c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +157,BA_03_53_14,CCCCNC(=O)c1cc(F)cc(Nc2nccc(-c3cnn4nc(OC)ccc34)n2)c1,,5.301257022949197,,4.647571668485639,,5.180096129581832,,5.5231721832556575,,4.973305149673089,,6.061845613898452,<,4.522878745280337,,5.856854053011399,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +158,BA_03_50_22,COc1ccc2ncnc(NCCc3cc(Cl)ccc3Cl)c2c1,<,4.522878745280337,,5.174096061139031,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +159,HO_N_134_E,COC(=O)c1cccnc1Nc1nc(-c2ccc(F)c(OC)c2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +160,PAR_228,N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)nc3)n2)cc1,,5.199047896812319,<,4.522878745280337,<,4.522878745280337,,5.419393534017114,<,4.522878745280337,,6.15102429675409,<,4.522878745280337,,5.077157095414884,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +161,ZDG_6_48_7,COc1cc(Nc2ncc3c(-c4cccc(NC(=O)C5CC5)c4)nn(C)c3n2)cc(OC)c1OC,,6.608812792635328,,5.880296586710919,,6.677117459945991,,6.432528506602515,,5.6002246719027,<,4.522878745280337,<,4.522878745280337,,6.7503642073489205,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +162,ZDG_7_46_4,Cn1cc(-c2ccccc2C(F)(F)F)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +163,HO_N_136_3_A,NC(=O)c1cccnc1Nc1nc(-c2cccs2)cc2nccn12,,4.869697704669959,,4.741316049922417,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +164,ZDG_6_59_1,COc1cc(Nc2ncc3c(n2)c(C#CC(C)(C)O)nn3C)cc(OC)c1OC,,5.53435254925281,<,4.522878745280337,,6.164754945156086,,5.716178678170349,<,4.522878745280337,,6.53804870085516,<,4.522878745280337,,5.428355979001119,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0 +165,BA_03_53_06,COc1ccc2c(-c3ccnc(Nc4cccc(S(=O)(=O)C(F)(F)F)c4)n3)cnn2n1,,4.794035680291206,,5.00831752919434,<,4.522878745280337,<,4.522878745280337,,4.718394640831949,,5.184435892753189,<,4.522878745280337,,4.754263693738152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +166,ZDG_7_35_B,Cn1cc(-c2ccc3c(c2)CCN3C(=O)C2CC2(C)C)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,,4.529307812306122,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +167,BA_03_59_04,Cc1cc(=O)n(C2CCC2)c2nc(Nc3ccc(N4CCN(C)CC4)c(F)c3)ncc12,<,4.522878745280337,<,4.522878745280337,,5.07608559483432,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +168,ZDG_7_43_1,Cn1cc(-c2cccc(NS(C)(=O)=O)c2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +169,PAR_322,CC(C)Oc1ccc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)cc1,,5.670568967240122,,4.799488863914139,,4.7474789540645,,5.600383453872854,,4.829449729046927,,6.757435575848146,<,4.522878745280337,,5.036644931759788,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +170,PAR_342,Cn1ccc(C(=O)Nc2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)n1,,5.125953308074533,,4.868552068704895,,4.691765667809468,,5.265252663608496,,4.902513637128014,,5.514806652393473,<,4.522878745280337,,4.877638414011683,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +171,HO_N_139_2,CNS(=O)(=O)c1ccccc1Nc1nc(-c2ccc(OC)c(OC)c2)cc2nccn12,,4.854251264510162,,4.829610548134223,,4.669920340904462,,5.114079721946229,,4.814355172887375,,4.934328040498036,<,4.522878745280337,,5.001589545970607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +172,HO_N_129,COc1ccc(-c2cc3nccn3c(Nc3cc(OC)c(OC)c(OC)c3)n2)cc1OC,,4.549123539647551,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +173,HO_N_134_A,COc1cc(-c2cc3nccn3c(Nc3ncccc3C(N)=O)n2)ccc1F,,4.64272200513473,,4.660609287879328,,4.64940575211658,,5.382040796273979,,4.964633396632863,,5.331311803157436,,4.960408141454957,,5.133822896182574,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +174,ZDG_7_41_A,Cn1cc(-c2ccc3c(c2)CCN3C(=O)[C@@H]2COc3ccccc3O2)c2c(N)ncnc21,,4.920483900338917,,5.358817744240419,,5.911720141175999,<,4.522878745280337,<,4.522878745280337,,4.785450760086494,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +175,ZDG_6_61_N2,COc1cc(Nc2ncc3c(n2)c(-c2cccc(NS(C)(=O)=O)c2)nn3C)cc(OC)c1OC,,7.239464524517891,,5.54532033618566,,6.929489972508245,,7.200588833375107,,5.651864459185521,<,4.522878745280337,<,4.522878745280337,,6.950860579077381,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +176,PAR_358,Cn1cc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)cn1,,5.980481432534826,<,4.522878745280337,<,4.522878745280337,,5.821544101545006,<,4.522878745280337,,6.659884862724448,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +177,HO_N_135_2_E,COC(=O)c1cccnc1Nc1nc(-c2ccc3c(c2)OCCO3)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +178,PAR_374,N#Cc1ccc(C(=O)Nc2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)cc1,,5.763846309947991,,5.286270031691473,,4.8848036103750445,,5.0976792113515215,,5.002424477521072,,6.252348624286782,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +179,BA_03_59_10,Cc1cc(=O)n(C2CCC2)c2nc(Nc3ccc(C(=O)NC(C)C)cc3)ncc12,,5.009976603015099,,4.655446188325175,,5.498859479811906,<,4.522878745280337,,4.831448025809144,,5.719625901340856,<,4.522878745280337,,4.836575009331562,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +180,PAR_311,COc1cc(N2CCOCC2)ccc1Nc1nccc(-c2ccc(C(=O)NCC#N)cc2)n1,,4.875562176211297,,4.672327574623886,,4.653894353690941,<,4.522878745280337,,4.583523615145392,,4.916383601137222,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +181,ZDG_5_55_7,NC[C@@H](NC(=O)c1ccc(-c2ccncc2)cc1)c1ccccc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +182,ZDG_7_37_B,Cc1nn(C)c(C)c1CC(=O)N1CCc2cc(-c3cn(C)c4ncnc(N)c34)ccc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +183,BA_03_66_b,COc1cc2ncn(-c3cc(OCc4ccc(-c5ccncc5)s4)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +184,ZDG_7_11,COCCN(C)C(=O)c1ccc(Nc2ncc3c(C)cc(=O)n(C4CCCC4)c3n2)cc1,,5.038417673220524,<,4.522878745280337,,5.139779690319417,<,4.522878745280337,,4.819320265352316,,5.575840996623945,<,4.522878745280337,,5.1470809063912935,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +185,ZDG_7_43_2,Cn1cc(-c2ccc3c(c2)CC(=O)N3)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +186,PAR_205,Cc1cc(C(=O)N[C@H]2CCN(C(=O)Nc3nc4c(s3)-c3nc(-c5ccccc5Br)ncc3CC4)C2)nn1C,,4.706292274445287,<,4.522878745280337,<,4.522878745280337,,5.1667083090804,<,4.522878745280337,,5.350574849239629,<,4.522878745280337,,4.7491437507531495,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +187,ZDG_7_27_1,C#CCOc1cc(-n2cnc3cc(OC)c(OC)cc32)sc1C(N)=O,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +188,ZDG_7_39_C,COc1ccccc1CC(=O)N1CCc2cc(-c3cn(C)c4ncnc(N)c34)ccc21,,5.029349539740927,,5.152811042534423,,5.054639888758325,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +189,ZDG_2_93,COc1cc(Nc2ncc3c(n2)c(-c2cccc(NS(C)(=O)=O)c2)nn3CC(F)F)cc(OC)c1OC,,6.349622199477918,,5.315822524551972,,6.037269575499916,<,4.522878745280337,,5.203465699071809,,5.801494773398018,<,4.522878745280337,,5.90938460604092,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +190,CE_245677,COc1ccc(C(=O)c2cn(C(C)C)c3ncnc(N)c23)cc1NC(=O)Nc1ccc(Cl)cc1Cl,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +191,ZDG_7_14,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3ccc(C(=O)N(C)C4CC4)cc3)ncc12,,5.444910558956879,,4.740223203749629,,5.403633688793094,<,4.522878745280337,<,4.522878745280337,,6.453841888779739,<,4.522878745280337,,5.147576584018467,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +192,BA_03_60_05,CCCCn1c(=O)cc(C)c2cnc(Nc3cccc(S(C)(=O)=O)c3)nc21,,4.71064546241489,<,4.522878745280337,,4.822919675499977,<,4.522878745280337,,4.553580199754428,,5.073861327272263,<,4.522878745280337,,5.320732617198839,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +193,HO_N_136_2_E,COC(=O)c1cccnc1Nc1nc(-c2ccc(N3CCOCC3)cc2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +194,BA_03_59_03,Cc1cc(=O)n(C2CCC2)c2nc(Nc3ccc(N4CCOCC4)cc3)ncc12,<,4.522878745280337,<,4.522878745280337,,5.536492193799708,<,4.522878745280337,,4.543233595588991,,5.0516436816152375,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +195,PAR_324,COc1ccc(Nc2nccc(-c3ccc(NC(=O)C4CCCN4)cc3)n2)cc1Cl,,4.924146656739188,,4.858009853722174,<,4.522878745280337,,5.539619861523638,<,4.522878745280337,,5.267145068892569,<,4.522878745280337,,4.761567653682853,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +196,PAR_394,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)COc4ccccc4)C3)n2)ccc1N1CCOCC1,<,4.522878745280337,,4.791377843945561,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +197,BA_03_51_19,COc1cccc(CCNc2ncnc3ccc(F)cc23)c1,<,4.522878745280337,,5.016639900263256,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +198,PAR_398,Cc1cc(Nc2nccc(N3CC[C@H](NC(=O)COc4ccccc4)C3)n2)ccc1N1CCOCC1,<,4.522878745280337,,5.378755296656826,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.205494685035351,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +199,ZDG_7_21_2,COc1cc2ncn(-c3cc(OCc4ccc5ccccc5c4)c(C(N)=O)s3)c2cc1OC,,5.4205862155722455,,5.41167528380389,,5.437569032962673,,5.19403445575992,,5.445884596662054,<,4.522878745280337,<,4.522878745280337,,5.200270614667603,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +200,ZDG_6_75_3,COc1cc(Nc2ncc3c(n2)c(-c2cccc(S(C)(=O)=O)c2)nn3C)cc(OC)c1OC,,6.891252183368805,<,4.522878745280337,,7.120274625212618,,7.426467977227534,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,7.468114687887472,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +201,HO_N_98,Cn1nc(-c2cccc(NS(C)(=O)=O)c2)c2nc(Nc3ccc(N4CCOCC4)cc3)ncc21,,6.106958653639852,,5.259735721832389,,6.212182010705377,,6.66685773646009,,5.277900710269178,,7.782353984182895,<,4.522878745280337,,6.607836931816712,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0 +202,PAR_90,O=C(NCc1cccc(F)c1)Nc1nc2c(s1)-c1nc(-c3ccccc3Br)ncc1CC2,,5.077431289362347,,4.914928066065915,,4.694492166193196,,5.061981637898671,,4.988231387689668,,5.545137437022733,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +203,BA_03_60_01,CCCCn1c(=O)cc(C)c2cnc(Nc3cc(OC)c(OC)c(OC)c3)nc21,,5.659723155021703,,4.689374220300698,,5.7300472567178415,,5.339274626147458,,4.907717682813136,,6.357928805442624,<,4.522878745280337,,5.845826566736619,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +204,BA_03_59_05,Cc1cc(=O)n(C2CCC2)c2nc(Nc3cccc(S(C)(=O)=O)c3)ncc12,,5.042704399864713,,4.780353647381379,,5.583684563735657,<,4.522878745280337,,4.85857669611246,,5.476412266461561,<,4.522878745280337,,5.600960957136003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +205,PAR_338,Cc1ccccc1CCC(=O)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.773842492893935,,5.663966776523732,,5.606180250657419,,5.8085663054789824,,5.740628400795056,,6.166726880865653,<,4.522878745280337,,5.5277440733905285,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +206,ZDG_7_44_4,COc1ncccc1-c1cn(C)c2ncnc(N)c12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +207,ZDG_7_43_4,COc1ccc(-c2cn(C)c3ncnc(N)c23)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +208,BA_03_59_07,COc1ccccc1Nc1ncc2c(C)cc(=O)n(C3CCC3)c2n1,,4.627146505382027,,4.53833768508282,,4.719127822874644,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +209,ZDG_6_48_4,COc1cc(Nc2ncc3c(-c4cccc(NS(=O)(=O)C(F)(F)F)c4)nn(C)c3n2)cc(OC)c1OC,,6.7362783823558665,,5.660878745330506,,7.070391042503128,,7.128762995687709,,4.933226575990378,<,4.522878745280337,<,4.522878745280337,,7.200467705751264,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +210,BA_03_65_c,COC(=O)c1sc(-n2cnc3cc(OC)c(OC)cc32)cc1OCc1sccc1F,,5.074721428638421,,4.965014934450227,,5.219896358959755,<,4.522878745280337,,4.849654758439626,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +211,HO_N_132_3_E,COC(=O)c1cccnc1Nc1nc(-c2ccc(OC)cc2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +212,BA_03_61_04,Cc1cc(=O)n(C2CCOCC2)c2nc(Nc3ccc(N4CCN(C)CC4)c(F)c3)ncc12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.9873329366252745,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +213,3853,COc1ccc2c(-c3ccnc(Nc4cccc(C(F)(F)F)c4)n3)cnn2n1,,,,,,,,,,,,5.608888386297197,,,,,,,,,,0.0,, +214,ZDG_6_60,Cn1nc(-c2cccc(NS(C)(=O)=O)c2)c2nc(Nc3ccc(C4CC4)c(P(C)(C)=O)c3)ncc21,,5.957822765913966,,5.180300495637555,,5.810888369041113,,6.517318783070742,,4.961545672241482,<,4.522878745280337,<,4.522878745280337,,6.366753716258976,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0 +215,HO_N_133_1_A,NC(=O)c1cccnc1Nc1nc(C2=CCOCC2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.7820580655384335,<,4.522878745280337,,4.669912426300299,<,4.522878745280337,,4.574860731113751,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +216,PAR_360,N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(CN4CCOCC4)cc3)n2)cc1,,5.396623625558428,,4.7179591764319255,,4.891123299789348,,5.435824480800892,,4.818275633982171,,5.611671019139783,<,4.522878745280337,,5.15556164378403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +217,3723,Cc1cnc(Nc2ccc(N3CCNCC3)cc2)nc1Nc1cccc(OCc2ccccc2F)c1C#N,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +218,BA_03_80_A,Cc1cc(=O)n(C2CCOCC2)c2nc(Nc3ccc(N4CCN(C)CC4)cn3)ncc12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +219,3790,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)COc4ccccc4)C3)n2)cc(OC)c1OC,,,,,,,,,,,,5.721246399047171,,,,,,,,,,0.0,, +220,BA_03_60_03,CCCCn1c(=O)cc(C)c2cnc(Nc3ccc(N4CCOCC4)cc3)nc21,,4.915000776345461,,4.674997182380425,,5.900361252603651,,4.880849434839875,,5.079615199699194,,5.499268882601292,<,4.522878745280337,,5.317257991313651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +221,BA_03_66_h,COc1cc2ncn(-c3cc(OCc4ccn(C)n4)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +222,BA_03_60_10,CCCCn1c(=O)cc(C)c2cnc(Nc3ccc(C(=O)NC(C)C)cc3)nc21,,4.678728850443628,<,4.522878745280337,,5.1861188093075405,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.896264415193177,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +223,TC_S_7005,C[C@H](Nc1cc2c(-c3ccc4c(c3)OCO4)noc2cn1)c1ccccc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.83095972316797,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.816293228943018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +224,ZDG_7_24_1,COc1cc2ncn(-c3cc(OCc4cccc(F)c4)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +225,PAR_168,N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)c(Cl)c3)n2)cc1,,6.1039715032828346,,5.27168686708747,,5.457184999498092,,6.155271166086964,,5.257464569418352,,7.11888929424705,<,4.522878745280337,,5.690901081386165,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0 +226,PAR_400,COc1ccc(CNc2nccc(N3CC[C@H](NC(=O)COc4ccccc4)C3)n2)cc1Cl,<,4.522878745280337,,4.651760068086324,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +227,RSS0680,Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)CN2C(=O)[C@@H](NC(=O)CCCCCN2CCN(c3ccc(Nc4ncc5scc(-c6cccc(NS(C)(=O)=O)c6)c5n4)cc3)CC2)C(C)(C)C)cc1,,6.54261166447481,,5.75585800388447,,6.295388017169884,,5.999097187127363,,5.848763313386655,,6.030727908922796,<,4.522878745280337,,6.611177726392288,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0 +228,ALK_kinase_inhibitor_1,COc1ccc(F)cc1-c1c(CO)sc2cnc(Nc3ccc(N4CCN(C)CC4)cc3OC(C)C)nc12,<,4.522878745280337,<,4.522878745280337,,4.536829647512487,<,4.522878745280337,,4.543123388688776,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +229,PAR_111,NCCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +230,BA_03_60_02,CCCCn1c(=O)cc(C)c2cnc(Nc3cccc(N4CCOCC4)c3)nc21,,5.343366504225447,,4.983140207370476,,5.92190220515168,,5.045371664249361,,4.993732625917914,,6.195057877289041,<,4.522878745280337,,6.116489048104294,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0 +231,HO_N_135_5_A,NC(=O)c1cccnc1Nc1nc(-c2ccccc2)cc2nccn12,,5.167932542347761,,4.786781893903362,<,4.522878745280337,<,4.522878745280337,,4.702448757509358,,4.997133813381141,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +232,ZDG_6_75_2,COc1cc(Nc2ncc3c(n2)c(-c2ccccc2NC(C)=O)nn3C)cc(OC)c1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.8419011094092586,<,4.522878745280337,,4.682799614428752,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +233,EF_3_105,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)Cc4ccc(NC(C)=O)cc4)C3)n2)cc(OC)c1OC,,4.574774333558005,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.593842407208746,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +234,ZDG_7_47_2,CN(C)C(=O)c1cccc(-c2cn(C)c3ncnc(N)c23)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +235,PAR_321,Cc1ccc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)cc1O,,6.009589815676634,,4.917865976255178,,4.864457187824321,,5.8264947165744125,,4.973637762948251,,6.774955460551948,<,4.522878745280337,,5.255202138876709,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +236,PAR_318,Cn1nnc2ccc(Nc3nccc(-c4ccc(C(=O)NCC#N)cc4)n3)cc21,,5.11781845030923,<,4.522878745280337,<,4.522878745280337,,5.101684406132499,<,4.522878745280337,,5.795336066086301,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +237,ZDG_7_42_1,Cc1ccccc1CC(=O)N1CCc2cc(-c3cn(C)c4ncnc(N)c34)ccc21,,5.61231584009862,,5.156010728689806,,6.243550038680226,<,4.522878745280337,<,4.522878745280337,,5.097975280610396,<,4.522878745280337,<,4.522878745280337,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +238,Derazantinib,COCCNCCc1cccc(Nc2ncc3c(n2)-c2ccccc2[C@H](c2ccccc2F)C3)c1,,4.946677510473712,,4.659559714931235,,5.083215299969158,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +239,PAR_345,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)C1(c2ccccc2)CCC1,,5.805406688792475,,5.103777576542542,,4.996365992609234,,6.423519472860726,,5.1412836734445975,,7.202756541176565,<,4.522878745280337,,5.409960957047747,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0 +240,ZDG_6_67,COc1ccc(-c2nn(C)c3cnc(Nc4cc(OC)c(OC)c(OC)c4)nc23)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,6.927259857017201,<,4.522878745280337,,5.156576221429403,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +241,BA_03_50_06,CCS(=O)(=O)c1cccc(Nc2ncnc3ccc(OC)cc23)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +242,PAR_381,CN(CCN1CCCC1)C(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +243,ZDG_6_50_4,COc1cc(Nc2ncc3c(-c4cccc(NS(=O)(=O)CC(F)(F)F)c4)nn(C)c3n2)cc(OC)c1OC,,6.782040430564839,,5.692492267454069,,6.9100886213863655,,6.881171851093462,,5.440548237561658,<,4.522878745280337,<,4.522878745280337,,7.078945066753827,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +244,TL01_010,NCCNS(=O)(=O)c1ccc(-c2ccnc(Nc3ccccc3)n2)cc1,,6.1046622770278045,<,4.522878745280337,,5.094714401665003,,5.359374999407884,,4.807704775595017,,6.860381498185879,<,4.522878745280337,,4.815742834889345,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +245,TL01_024,COc1ccc(Nc2nccc(-c3ccc(S(=O)(=O)NCCNS(=O)(=O)C4CC4)cc3)n2)cc1,,5.80560660490504,,5.122882797819735,,4.864661445175623,<,4.522878745280337,<,4.522878745280337,,6.452344651678416,<,4.522878745280337,,4.977133610283185,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +246,HO_N_135_7_A,NC(=O)c1cccnc1Nc1nc(-c2cccc(N3CCOCC3)c2)cc2nccn12,,5.3590028575545645,,4.884641844798198,,4.6950213420499,<,4.522878745280337,,4.880768360848983,,5.207366809608026,<,4.522878745280337,,4.86971742205831,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +247,ZDG_6_48_2,CCS(=O)(=O)Nc1cccc(-c2nn(C)c3nc(Nc4cc(OC)c(OC)c(OC)c4)ncc23)c1,,7.080746134041689,,5.695443092049875,,6.770898194393608,,6.995989692893522,,5.648842078920682,<,4.522878745280337,<,4.522878745280337,,7.016264366350998,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +248,PAR_370,O=C(CN1CCOCC1)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.826213105920755,,4.816469259671949,<,4.522878745280337,,5.711795001781335,<,4.522878745280337,,6.008672458852921,<,4.522878745280337,,4.90328024809868,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +249,PAR_241,O=C1CCC(C(=O)Nc2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)C1,,5.088605527511813,,4.628056532634228,<,4.522878745280337,,5.73702692588722,<,4.522878745280337,,6.133812442366555,<,4.522878745280337,,5.299515894450547,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +250,ZDG_7_26_1,CCNC(=O)c1cccc(COc2cc(-n3cnc4cc(OC)c(OC)cc43)sc2C(N)=O)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +251,Tpl2_Kinase_Inhibitor_1,N#Cc1cnc2cnc(NCc3cccnc3)cc2c1Nc1ccc(F)c(Cl)c1,<,4.522878745280337,,5.025389240195332,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +252,BA_03_61_10,Cc1cc(=O)n(C2CCOCC2)c2nc(Nc3ccc(C(=O)NC(C)C)cc3)ncc12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +253,ZDG_7_46_1,Cn1cc(-c2cn(C)c3ncnc(N)c23)cn1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +254,HO_N_135_1_A,NC(=O)c1cccnc1Nc1nc(-c2cccc(C(F)(F)F)c2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +255,PAR_162,Cc1nocc1C(=O)N[C@H]1CCN(C(=O)Nc2nc3c(s2)-c2nc(-c4ccccc4Br)ncc2CC3)C1,,5.555378610964486,,5.133606152079814,,5.025422728526052,,5.887663815348757,,4.955427327315712,,6.635858913662164,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +256,3842,COc1cc(Nc2nccc(-c3ccc4[nH]ccc4c3)n2)cc(C(F)(F)F)c1,,,,,,,,,,,,5.917933065714887,,,,,,,,,,0.0,, +257,BA_03_50_11,COc1ccc2ncnc(NCCc3ccncc3)c2c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +258,AMG_47a,Cc1ccc(C(=O)Nc2cccc(C(F)(F)F)c2)cc1-c1ccc2nc(NCCN3CCOCC3)ncc2c1,<,4.522878745280337,,4.772798973483567,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +259,Golvatinib,CN1CCN(C2CCN(C(=O)Nc3cc(Oc4ccc(NC(=O)C5(C(=O)Nc6ccc(F)cc6)CC5)c(F)c4)ccn3)CC2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +260,PAR_372,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)c1cccc2c1OCO2,,5.2179944972202765,,5.037654317887743,,4.910870806351179,,5.22120758405103,,5.188357732924196,<,4.522878745280337,<,4.522878745280337,,5.031553392186277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +261,HO_N_132_3_A,COc1ccc(-c2cc3nccn3c(Nc3ncccc3C(N)=O)n2)cc1,,5.046209872089897,,4.917674305168511,,4.779069386451229,<,4.522878745280337,,4.888429126486219,,5.33088176017039,<,4.522878745280337,,5.067141519727515,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +262,PAR_113,Cc1cccc(CNC(=O)c2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)c1,,5.217302986321796,,5.110465948069461,,5.028279794192806,,5.292558901588945,,5.149197872069948,,6.055950944435368,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +263,BSc5367,COC(=O)c1cccc(-c2cnc3[nH]cc(-c4cccnc4)c3c2)c1,,5.082177979315596,,4.9655607865278375,,4.73755422206139,,5.032137334346293,<,4.522878745280337,,5.978695546843435,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +264,2096,Fc1cc(-c2ccnc(Nc3ccc(-n4cnc(N5CCOCC5)n4)cc3)n2)cc(N2CCOCC2)c1,,,,,,,,,,,,5.8696662315049934,,,,,,,,,,0.0,, +265,BA_03_51_18,Fc1ccc2ncnc(NCCc3cccc(Br)c3)c2c1,<,4.522878745280337,,6.107209089137724,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0 +266,Casein_Kinase_II_Inhibitor_IV,COc1cc(Nc2ncc3ccn(-c4cccc(CCC#N)c4)c3n2)cc(OC)c1OC,,6.622840508828964,,5.183577810854469,,6.0280015818133,,5.806834079003082,,5.539397807365343,,7.208527440788626,<,4.522878745280337,,6.924195590604335,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0 +267,PAR_316,COc1ccc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)cc1Cl,,5.798704140084891,,5.048846842351343,,4.859488180352635,,5.611174510937102,,5.058627743056515,,6.428120192862645,<,4.522878745280337,,5.214629733947848,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +268,BI_882370,CCN1CCC(N(C)c2ccc3c(n2)c(-c2cncnc2)cn3-c2c(F)ccc(NS(=O)(=O)CC)c2F)CC1,,4.69122285715209,,4.890492432107752,,4.769411612209313,<,4.522878745280337,,4.799019010064749,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +269,3732,N#Cc1c(Nc2nc(Nc3ccc(C4CCNCC4)cc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +270,S116836,Cc1ccc(C(=O)Nc2cc(-n3ccnc3)cc(C(F)(F)F)c2)cc1C#Cc1cnc(NC2CC2)nc1,,4.733622324873981,<,4.522878745280337,,4.890410185201036,,4.930514495386641,,4.970140129981151,<,4.522878745280337,<,4.522878745280337,,4.832507505969726,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +271,PAR_351,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3ccn(C)n3)ncc12,,5.444699172100818,<,4.522878745280337,,5.586962035966202,<,4.522878745280337,,4.763486558006579,,6.379269097867836,<,4.522878745280337,,5.704715854657756,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +272,BA_03_50_19,COc1cccc(CCNc2ncnc3ccc(OC)cc23)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +273,PAR_356,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3cccc(CN4CCOCC4)c3)ncc12,,4.70436486658277,<,4.522878745280337,,5.198768119199553,<,4.522878745280337,<,4.522878745280337,,5.756047241264547,<,4.522878745280337,,4.896988634663503,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +274,BA_03_50_07,COc1cccc([C@H](C)Nc2ncnc3ccc(OC)cc23)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +275,HO_N_138_2_A,Cn1cnc2ccc(-c3cc4nccn4c(Nc4ncccc4C(N)=O)n3)cc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +276,HO_N_73,COc1ccc(-c2cc3nccn3c(Nc3ccncc3C(N)=O)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +277,BA_03_65_a,COC(=O)c1sc(-n2cnc3cc(OC)c(OC)cc32)cc1OCc1sccc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +278,ZDG_6_38,COc1cc(Nc2ncc3c(-c4cccc([N+](=O)[O-])c4)nn(C)c3n2)cc(OC)c1OC,,5.352601318996579,<,4.522878745280337,,4.724733398906056,,5.920407115946844,<,4.522878745280337,,7.0345314539649815,<,4.522878745280337,,5.703723860616155,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +279,EF_3_201,O=C(COc1ccccc1)N[C@H]1CCN(c2ccnc(Nc3ccc(F)cc3)n2)C1,<,4.522878745280337,,4.849411098115262,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +280,TL01_023,CC(=O)Nc1ccc(Nc2nccc(-c3ccc(S(=O)(=O)NCCN)cc3)n2)cc1,,4.684514698854729,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.526497375134073,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +281,PAR_333,COc1cc(Nc2nccc(-c3ccc(NC(=O)C4CCCN4)cc3)n2)ccc1N1CCOCC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +282,PAR_327,O=C(Nc1ccc(-c2ccnc(Nc3ccc(Cl)cc3)n2)cc1)C1CCCN1,<,4.522878745280337,,4.731767974323136,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.051982890328434,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +283,ZDG_6_48_3,COc1cc(Nc2ncc3c(-c4cccc(NC(C)=O)c4)nn(C)c3n2)cc(OC)c1OC,,6.277591921572145,,5.494845765107401,,6.151241698278676,,6.15745762658822,,5.309162923367781,,7.884845915322762,<,4.522878745280337,,6.713127839372918,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0 +284,HO_N_71,COc1ccc(-c2cc3nccn3c(Nc3cnccc3C(N)=O)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +285,ZDG_7_24_2,COc1cc2ncn(-c3cc(OCc4cc(C(F)(F)F)cc(C(F)(F)F)c4)c(C(N)=O)s3)c2cc1OC,,4.536065565647475,<,4.522878745280337,,4.565454954825468,<,4.522878745280337,,4.580553530573381,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +286,BA_03_59_01,COc1cc(Nc2ncc3c(C)cc(=O)n(C4CCC4)c3n2)cc(OC)c1OC,,5.8095204295214895,,4.793359288759993,,6.554206434466748,,4.788746293579548,,4.903651125964368,,6.693623348897365,<,4.522878745280337,,6.01253545631465,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0 +287,HO_N_135_8_A,NC(=O)c1cccnc1Nc1nc(-c2cc(C(F)(F)F)cc(C(F)(F)F)c2)cc2nccn12,<,4.522878745280337,,4.693649170809873,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +288,ZDG_7_15,Cn1cc(-c2cccc(C(F)(F)F)c2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +289,PAR_395,O=C(COc1ccccc1)N[C@H]1CCN(c2ccnc(Nc3cccc(N4CCOCC4)c3)n2)C1,,4.8730652351051305,,4.789935576943914,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +290,ZDG_6_59_3,COc1cc(Nc2ncc3c(n2)c(C#CCN(C)C)nn3C)cc(OC)c1OC,,4.925516752117212,<,4.522878745280337,,5.229719574217726,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.444903582758697,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +291,PAR_309,N#CCNC(=O)c1ccc(-c2ccnc(Nc3cccc(N4CCOCC4)c3)n2)cc1,,5.89300432206647,,4.898927360316853,,5.175955362843072,,5.839884473737776,,4.969805758716476,,6.755038622864529,<,4.522878745280337,,5.623268330211212,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +292,BA_03_61_05,CS(=O)(=O)c1cccc(Nc2ncc3ccc(=O)n(C4CCOCC4)c3n2)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +293,PAR_336,CC(C)Oc1ccc(Nc2nccc(-c3ccc(NC(=O)C4CCCN4)cc3)n2)cc1,<,4.522878745280337,,5.064189055843583,<,4.522878745280337,,5.697169228177547,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +294,ZDG_7_40_B,Cn1cc(-c2ccc3c(c2)CCN3C(=O)Cc2cccc3c2OCO3)c2c(N)ncnc21,,5.285138286337057,,4.944511813241703,,5.359068800417957,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +295,PAR_138,O=C(Nc1cc(O)[nH]n1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +296,HO_N_131_5_E,COC(=O)c1cccnc1Nc1nc(-c2cnn(C)c2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +297,BA_03_56_04,CCOc1ccc2c(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cnn2n1,,6.382563467723347,,5.442808122452164,,6.023964920206267,,6.716551966323366,,5.070433254155037,,7.804781549199357,<,4.522878745280337,,6.817368767842395,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0 +298,BA_03_78_c,CS(=O)(=O)c1ccc(CCNc2ncnc3sc(Br)cc23)cc1,<,4.522878745280337,,4.982287803241264,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +299,XL_019,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)[C@@H]1CCCN1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.574508571207707,<,4.522878745280337,,5.118751627945101,<,4.522878745280337,,4.525261171671562,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +300,PAR_139,O=C(NC1CN2CCC1CC2)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +301,ZDG_6_50_3,COc1cc(Nc2ncc3c(-c4cccc(NS(=O)(=O)N(C)C)c4)nn(C)c3n2)cc(OC)c1OC,,7.2636414639443965,,5.472371392516424,,6.780426843309521,,6.852293717481508,,5.658476042428143,<,4.522878745280337,<,4.522878745280337,,6.765197331288906,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0 +302,BA_03_51_22,Fc1ccc2ncnc(NCCc3cc(Cl)ccc3Cl)c2c1,<,4.522878745280337,,5.402880060144108,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +303,PAR_271,O=C(NCC12CCC(CC1)C2)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.93508730665225,,4.898964375295649,,4.593051517658745,,5.166284731172521,,4.634132189775471,,5.280807709808106,<,4.522878745280337,,4.9921266697179405,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +304,3735,C[C@@H](Oc1cccc(Nc2nc(Nc3ccc(N4CCN(C)CC4)cc3)ncc2Cl)c1C#N)c1ccccc1F,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +305,3791,COc1cc(Nc2nccc(N3CC[C@@H](NC(=O)[C@@H]4COc5ccccc5O4)C3)n2)cc(OC)c1OC,,,,,,,,,,,,5.026872146400301,,,,,,,,,,0.0,, +306,ba_03_55_11,FC(F)(F)c1cc(Nc2nccc(-c3cnn4ncccc34)n2)cc(N2CCOCC2)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.880794677881373,<,4.522878745280337,,5.908010613583455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +307,BA_03_56_01,CCOc1ccc2c(-c3ccnc(Nc4cc(OC)c(OC)c(OC)c4)n3)cnn2n1,,5.548684121326926,,4.576730188410739,,4.971611933928847,,5.172837185648649,<,4.522878745280337,,7.13102765360829,<,4.522878745280337,,6.443358002843733,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0 +308,BA_03_53_08,COc1ccc2c(-c3ccnc(Nc4cccc(S(C)(=O)=O)c4)n3)cnn2n1,,5.3941391822593125,,4.704974298184757,,4.627055792780061,,5.718845206864829,<,4.522878745280337,,6.896405638759742,<,4.522878745280337,,5.735125613105951,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +309,PAR_260,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)C1CCCC1,,5.364643116384741,,4.919894520601953,,4.782892139367345,,5.267484537020429,,4.681547148925979,,6.22558409869272,<,4.522878745280337,,5.207024547362414,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +310,HO_N_100,COc1ccc(-c2cc3nccn3c(Nc3ccc(Br)cc3C(N)=O)n2)cc1OC,,4.929800125507245,,4.958520192141293,,4.945548775435376,,5.447834988259078,,4.954234799643147,,5.213831432030042,<,4.522878745280337,,5.359627629229694,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +311,PAR_348,O=C(CC1Cc2ccccc2C1)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.490429121138964,,5.24455236714161,,5.062735667799319,,5.232626165902529,,5.253711168111956,,6.1844141633771095,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +312,PAR_183,O=C(N[C@H]1CCN(C(=O)Nc2nc3c(s2)-c2nc(-c4ccccc4Br)ncc2CC3)C1)C1CC1,,4.726863738101162,<,4.522878745280337,<,4.522878745280337,,5.278847629271948,<,4.522878745280337,,5.576243165624568,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +313,HO_N_49,CNC(=O)c1ccccc1Nc1nc(-c2ccc(OC)c(OC)c2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +314,HO_N_110,COc1ccc(-c2cc3nccn3c(Nc3ccc(N4CCOCC4)cc3)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +315,G1T38,CC(C)N1CCN(c2ccc(Nc3ncc4cc5n(c4n3)C3(CCCCC3)CNC5=O)nc2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +316,PAR_401,CC(C)Oc1ccc(Nc2nccc(N3CC[C@H](NC(=O)COc4ccccc4)C3)n2)cc1Cl,<,4.522878745280337,,5.330784135217551,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +317,ZDG_6_47,COc1cc(Nc2ncc3c(-c4cccc(NC(=O)NC(C)C)c4)nn(C)c3n2)cc(OC)c1OC,,6.2415248779191135,,5.565425742941704,,6.266056824425502,,5.360749393514001,,5.316223306472722,<,4.522878745280337,<,4.522878745280337,,6.191882684062247,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0 +318,ZDG_7_32_B,Cn1cc(-c2ccc3c(c2)CCN3C(=O)C2CC(=O)N(c3cccc(F)c3)C2)c2c(N)ncnc21,,4.574573664014865,,4.639009402674452,,4.939457001176905,<,4.522878745280337,,4.711262806697011,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +319,BA_03_60_07,CCCCn1c(=O)cc(C)c2cnc(Nc3ccccc3OC)nc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +320,BA_03_53_13,COc1cc(F)cc(Nc2nccc(-c3cnn4nc(OC)ccc34)n2)c1,<,4.522878745280337,,4.695633228217571,<,4.522878745280337,,5.267069662565771,<,4.522878745280337,,6.558862363961111,<,4.522878745280337,,6.056005462689938,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0 +321,KW_2449,O=C(c1ccc(/C=C/c2n[nH]c3ccccc23)cc1)N1CCNCC1,,4.948693454498172,<,4.522878745280337,,4.887718432386475,<,4.522878745280337,<,4.522878745280337,,4.914243194598751,<,4.522878745280337,,5.172077927060755,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +322,PAR_312,CS(=O)(=O)c1cccc(Nc2nccc(-c3ccc(C(=O)NCC#N)cc3)n2)c1,,5.437692331304064,,4.721068208451692,,4.7616092095892775,,5.408702035088601,,4.76164237316853,,5.988516790479124,<,4.522878745280337,,5.35043781148929,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +323,PAR_275,C[C@H](NC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)c1ccccc1,,5.383519364384667,,4.728697888697462,,4.791472677138329,,5.48764661046281,,4.802972377414802,,6.098455015848394,<,4.522878745280337,,4.777662987898371,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +324,PAR_270,CN1CCC(CNC(=O)c2ccc(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cc2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +325,ZDG_7_24_3,COc1cc2ncn(-c3cc(OCc4c(F)cccc4F)c(C(N)=O)s3)c2cc1OC,,4.969069819312674,<,4.522878745280337,,5.762940099478096,,5.021398828517733,,4.985809536051859,<,4.522878745280337,<,4.522878745280337,,5.019323835687544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +326,HO_N_136_2_A,NC(=O)c1cccnc1Nc1nc(-c2ccc(N3CCOCC3)cc2)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.545262682940036,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +327,ZDG_7_50_1,CN1CCN(c2ccc(Nc3nccc(-c4ccc(NC(=O)[C@@H]5CCCN5)cc4)n3)cc2Cl)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.543334033192413,<,4.522878745280337,,4.662880551711223,<,4.522878745280337,,4.526598286962623,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +328,BA_03_53_05,COc1ccc2c(-c3ccnc(Nc4ccccc4)n3)cnn2n1,,5.180564558632154,<,4.522878745280337,,4.738761595469503,<,4.522878745280337,<,4.522878745280337,,5.847229962194939,<,4.522878745280337,,5.384984275181187,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +329,PAR_357,Cn1nccc1Nc1nccc(-c2ccc(C(=O)NCC#N)cc2)n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +330,ZDG_6_59_2,COc1cc(Nc2ncc3c(n2)c(C#CC2(O)CCCC2)nn3C)cc(OC)c1OC,,5.685274484736024,<,4.522878745280337,,6.1292204946011815,,5.680715631887232,<,4.522878745280337,,6.716608779237629,<,4.522878745280337,,5.470584302976446,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0 +331,PAR_354,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3ccn(C4CCN(C)CC4)n3)ncc12,,4.698496042844905,<,4.522878745280337,,5.456566921861255,<,4.522878745280337,<,4.522878745280337,,5.5035703597579975,<,4.522878745280337,,5.358810350415297,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +332,ZDG_7_45_2,Cn1cc(-c2cccs2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +333,PAR_362,N#CCNC(=O)c1ccc(-c2ccnc(Nc3cnn(C(F)F)c3)n2)cc1,,5.77941053541049,<,4.522878745280337,<,4.522878745280337,,5.543231659097812,<,4.522878745280337,,6.648579843608567,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +334,3833,CCOc1ccc2c(-c3ccnc(Nc4cc(OC)cc(OC)c4)n3)cnn2n1,,,,,,,,,,,,6.308034897232639,,,,,,,,,,1.0,, +335,HO_N_136_7_A,NC(=O)c1cccnc1Nc1nc(-c2ccccc2OC(F)(F)F)cc2nccn12,<,4.522878745280337,,4.803269399789653,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +336,HO_N_133_5_A,COc1ncccc1-c1cc2nccn2c(Nc2ncccc2C(N)=O)n1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +337,ZDG_2_92,COc1cc(Nc2ncc3c(n2)c(-c2cccc(NS(C)(=O)=O)c2)nn3C(C)C)cc(OC)c1OC,,6.536205092379067,,5.244166004673225,,6.082230741504421,,5.004846225536155,,5.209194819890519,,5.905229808621821,<,4.522878745280337,,5.4416650725039055,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 +338,TBAP_001,CC(C)(C)c1cc(NC(=O)Nc2ccc(Oc3ccnc4[nH]c(=O)cnc34)cc2F)n(-c2cccc(F)c2)n1,,5.101565883784068,,5.034757595688686,,4.9773492337294485,,4.88164810111425,,5.166532930239859,<,4.522878745280337,,5.249676748945265,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +339,ZDG_7_35_A,CC1CC1C(=O)N1CCc2cc(-c3cn(C)c4ncnc(N)c34)ccc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +340,3838,CCOc1ccc2c(-c3ccnc(Nc4ccc(CN5CCOCC5)cc4)n3)cnn2n1,,,,,,,,,,,,6.052566278112948,,,,,,,,,,1.0,, +341,ALW_II_49_7,Cc1ccc(C(=O)Nc2cccc(C(F)(F)F)c2)cc1Nc1cncc(C(N)=O)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +342,3801,CCOc1ccc2c(-c3ccnc(Nc4cccc(S(C)(=O)=O)c4)n3)cnn2n1,,,,,,,,,,,,7.154901959985742,,,,,,,,,,1.0,, +343,HO_N_133_3_E,COC(=O)c1cccnc1Nc1nc(-c2cccc(OC)c2OC)cc2nccn12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +344,BA_03_50_13,COc1ccc2ncnc(N[C@H](C)c3ccccc3F)c2c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +345,BA_03_50_12,COc1ccc2ncnc(NCCc3ccc(F)cc3)c2c1,<,4.522878745280337,,4.979564642347633,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +346,BA_03_61_02,Cc1cc(=O)n(C2CCOCC2)c2nc(Nc3cccc(N4CCOCC4)c3)ncc12,,4.738035237054328,<,4.522878745280337,,4.862452908010375,<,4.522878745280337,,4.736607136839892,,5.305362952137882,<,4.522878745280337,,4.856607360583436,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +347,Ifidancitinib,COc1cc(Nc2ncc(C)c(Nc3ccc4oc(=O)[nH]c4c3)n2)cc(C)c1F,,5.387247664524138,,4.832264495121284,,4.5228994754347385,,5.030229217768204,,4.577296369141819,,5.298208185567936,<,4.522878745280337,,5.179591189738451,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +348,ZDG_6_66,COc1ccccc1Nc1ncc2c(n1)c(-c1cccc(NS(C)(=O)=O)c1)nn2C,,5.138546774720314,,4.971941526406618,,5.006132125882433,,4.928694690643835,,4.987415237762792,,6.614735179247774,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +349,HO_N_140,COc1ccc(-c2cc3nccn3c(Nc3ncccc3C(=O)O)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +350,HO_N_62,COc1ccc(-c2cc3nccn3c(Nc3ccccn3)n2)cc1OC,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +351,ZDG_7_35_C,Cn1cc(-c2ccc3c(c2)CCN3C(=O)C2C(C)(C)C2(C)C)c2c(N)ncnc21,,4.931814837850233,,5.046661835207546,,5.472401646550287,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +352,GSK461364,C[C@@H](Oc1cc(-n2cnc3ccc(CN4CCN(C)CC4)cc32)sc1C(N)=O)c1ccccc1C(F)(F)F,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +353,3823,CCOc1ccc2c(-c3ccnc(Nc4ccc(C(N)=O)cc4)n3)cnn2n1,,,,,,,,,,,,7.075720713938117,,,,,,,,,,1.0,, +354,PAR_141,O=C(NCC1CCC(F)(F)CC1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +355,ZDG_7_26_3,CCN(C(=O)COc1cc(-n2cnc3cc(OC)c(OC)cc32)sc1C(N)=O)c1cccc(C)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +356,BA_03_50_03,COc1ccc2ncnc(NC3[C@H]4C[C@@H]5C[C@@H](C[C@H]3C5)C4)c2c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +357,HO_N_133_3_A,COc1cccc(-c2cc3nccn3c(Nc3ncccc3C(N)=O)n2)c1OC,,4.636953072082088,,4.612097017819476,,4.705893907584375,,5.101435565833748,<,4.522878745280337,,5.346187827681289,<,4.522878745280337,,5.08700015944291,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +358,PAR_378,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)C1CC1,,5.795223621307058,,5.069636181663933,,5.189563149194148,,5.354112429809823,,5.090225183808611,,6.435809867643536,<,4.522878745280337,,5.320880002769998,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +359,PAR_396,CN(C)S(=O)(=O)c1cccc(Nc2nccc(N3CC[C@H](NC(=O)COc4ccccc4)C3)n2)c1,,5.639804514613448,,5.698986663575901,,5.589807383635014,<,4.522878745280337,,5.485285553246834,<,4.522878745280337,<,4.522878745280337,,5.391752647388778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +360,PAR_248,O=C(Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)C1CCOCC1,,4.7701367862789015,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.539222443114369,<,4.522878745280337,,4.828484144000552,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +361,ZDG_7_34_B,Cn1cc(-c2ccc3c(c2)CCN3C(=O)C2CCC(F)(F)CC2)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,,4.738800287970558,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +362,PAR_353,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3ccc(CN4CCOCC4)cc3)ncc12,,4.889178578148,,4.694100978880336,,5.521752502766849,<,4.522878745280337,<,4.522878745280337,,5.609121210067395,<,4.522878745280337,,5.216615473969439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +363,PAR_140,O=C(c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1)N1CC[C@@H](O)C1,,4.947851149814153,<,4.522878745280337,<,4.522878745280337,,4.9167148935059615,<,4.522878745280337,,5.738652702744123,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +364,ZDG_7_41_3,Cn1cc(-c2ccc3c(c2)CCN3C(=O)Cc2ccc3c(c2)OCCO3)c2c(N)ncnc21,,5.009846998035169,,5.068850956510059,,5.460293990716453,<,4.522878745280337,,4.979278393101506,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +365,ON1231320,Cn1c(=O)c(S(=O)(=O)c2ccc(F)cc2F)cc2cnc(Nc3ccc4[nH]ccc4c3)nc21,,5.414618604428587,,5.277990016375206,,5.399388668716516,,5.37920890878158,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.257878075412741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +366,BA_03_61_03,Cc1cc(=O)n(C2CCOCC2)c2nc(Nc3ccc(N4CCOCC4)cc3)ncc12,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +367,PAR_81,O=C(Nc1nc2c(s1)-c1nc(-c3ccccc3Br)ncc1CC2)NC1CCCNC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +368,ZDG_7_51_7,COc1cc(Nc2ncc3c(C)cc(=O)n(C4CCCC4)c3n2)cc(OC)c1OC,,5.844613985129847,<,4.522878745280337,,6.002273976255355,,4.53953279402968,,5.039361792838563,,6.680243093632786,<,4.522878745280337,,6.224836377308716,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0 +369,ZDG_6_72,COc1cc(Nc2ncc3c(n2)c(C2=CCOCC2)nn3C)cc(OC)c1OC,,5.256698859829601,<,4.522878745280337,,5.408457936833549,,5.050230571632087,<,4.522878745280337,,7.322740251210563,<,4.522878745280337,,5.217736958089621,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +370,ZDG_7_53_1,CN1CCN(c2cccc(Nc3nccc(-c4ccc(NC(=O)[C@@H]5CCCN5)cc4)n3)c2)CC1,,5.338173223355024,<,4.522878745280337,<,4.522878745280337,,6.231164950149952,<,4.522878745280337,,6.334276757369631,<,4.522878745280337,,5.32844855319665,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0 +371,Nimucitinib,NC(=O)c1cnc(Nc2cccc(NC(=O)[C@H]3CCCNC3)c2)cc1NCc1cc(F)cc(F)c1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.651869225303184,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +372,HO_N_83,CN1CCN(c2ccc(Nc3ncc4c(n3)c(-c3cccc(NS(C)(=O)=O)c3)nn4C)cc2)CC1,,5.561448562355972,<,4.522878745280337,,6.155927167866227,,6.479296566675651,<,4.522878745280337,,7.354332708971508,<,4.522878745280337,,6.592940999627449,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0 +373,PAR_110,O=C(NC1CCCNC1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +374,ZDG_6_41,COc1cc(Nc2ncc3c(-c4cccc(N)c4)nn(C)c3n2)cc(OC)c1OC,,6.072137063704354,,5.331158920986598,,6.41637171103705,,6.192495604580524,,5.327101828744807,,7.769368396562845,<,4.522878745280337,,6.210385953351892,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0 +375,ZDG_7_47_3,Cn1cc(-c2ccc3c(c2)OCCO3)c2c(N)ncnc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +376,3856,Cc1cnc(Nc2ccc(NC(=O)c3cccc(NC(=O)/C=C/CN(C)C)c3)cc2)nc1-c1cccnc1,,,,,,,,,,,,5.549750891680638,,,,,,,,,,0.0,, +377,BA_03_53_04,COc1ccc2c(-c3ccnc(Nc4ccc(N5CCOCC5)cc4)n3)cnn2n1,,5.824924592355906,,5.209294342423722,,5.663139266208908,,6.3290190292376,,4.837143150361911,<,4.522878745280337,<,4.522878745280337,,6.6043667019657,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0 +378,ZDG_7_9,Cc1cc(=O)n(C2CCCC2)c2nc(Nc3ccc(C(=O)NCC(F)F)cc3)ncc12,,4.956505457771383,,4.703039486786711,,4.990639364692515,<,4.522878745280337,,4.74687777691037,,6.067682583262696,<,4.522878745280337,,5.269688804153579,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0 +379,ZDG_7_37_C,Cn1cc(-c2ccc3c(c2)CCN3C(=O)CC2Cc3ccccc3C2)c2c(N)ncnc21,,5.105383633393123,,5.257293748542373,,5.378444490078154,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +380,ZDG_7_50_5,CN1CCN(C(=O)c2ccc(Nc3nccc(-c4ccc(NC(=O)[C@@H]5CCCN5)cc4)n3)cc2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.154876214572618,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +381,ZDG_7_50_3,CN1CCN(c2ccc(Nc3nccc(-c4ccc(NC(=O)[C@@H]5CCCN5)cc4)n3)cc2)CC1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,5.493317766712035,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,4.58678479491856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +382,PAR_108,O=C(NC1CCCCCC1)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,4.648976885597432,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +383,TIE_2_VEGFR_2_kinase_IN_2,Nc1ncnc2occ(-c3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)c12,,4.923856956015497,,5.279845965163705,,5.358873766581655,<,4.522878745280337,<,4.522878745280337,,5.275306784282457,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +384,BA_03_65_b,COC(=O)c1sc(-n2cnc3cc(OC)c(OC)cc32)cc1OCc1ccc(-c2ccncc2)s1,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +385,ZDG_7_36_C,CC#CC(=O)N1CCc2cc(-c3cn(C)c4ncnc(N)c34)ccc21,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +386,3739,N#Cc1c(Nc2nc(Nc3ccc(N4CCNCC4)cc3)ncc2Cl)cccc1OCc1cccc(F)c1,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +387,ZDG_6_48_1,COc1cc(Nc2ncc3c(-c4cccc(NS(=O)(=O)C5CC5)c4)nn(C)c3n2)cc(OC)c1OC,,7.134526430898649,<,4.522878745280337,,6.827566657475918,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,,6.980961650786502,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0 +388,3743,N#Cc1c(Nc2nc(Nc3cnn(C4CCNCC4)c3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +389,ZDG_7_23_2,COc1cc2ncn(-c3cc(OCc4ccc(F)cc4)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,<,4.522878745280337,,4.595719696676417,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +390,PAR_408,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)c4ccc(C#N)cc4)C3)n2)cc(OC)c1OC,,5.598642092872625,,4.692209917210851,,4.864752009196747,<,4.522878745280337,,4.769202671483697,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +391,3821,CCOc1ccc2c(-c3ccnc(Nc4ccc(N5CCN(C)CC5)cc4)n3)cnn2n1,,,,,,,,,,,,7.236572006437062,,,,,,,,,,1.0,, +392,BA_03_66_c,COc1cc2ncn(-c3cc(OCc4sccc4F)c(C(N)=O)s3)c2cc1OC,<,4.522878745280337,,4.534933246450109,,4.987977287080152,<,4.522878745280337,,4.6026568572406905,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +393,PAR_149,O=C(Nc1nc2c(s1)-c1nc(-c3ccccc3Br)ncc1CC2)N1CCC(N2CCCCC2)CC1,,4.803895504413972,<,4.522878745280337,,4.70292748711545,,4.937318844782486,<,4.522878745280337,,5.305326640282966,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +394,PAR_247,CCOc1ccsc1C(=O)Nc1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1,,5.213153750968695,,5.065611488315886,,4.91383756830168,,5.220674896840294,,5.14240597996013,,5.22581934838519,<,4.522878745280337,,5.141058054333915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +395,CHZ868,CC(=O)Nc1cc(Oc2ccc3c(nc(Nc4ccc(F)cc4F)n3C)c2C)ccn1,<,4.522878745280337,,4.73755906290163,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +396,HS_1371,Cc1ccc(Oc2ccnc3cc(-c4cnn(C5CCNCC5)c4)ccc23)cc1,<,4.522878745280337,,4.546058875121365,,4.6313362303328764,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,<,4.522878745280337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +397,3722,N#Cc1c(Nc2nc(Nc3ccc(N4CCOCC4)cc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +398,3822,CCOc1ccc2c(-c3ccnc(Nc4cccc(S(N)(=O)=O)c4)n3)cnn2n1,,,,,,,,,,,,7.119186407719209,,,,,,,,,,1.0,, +399,3738,N#Cc1c(Nc2nc(Nc3ccc(N4CCNCC4)cc3)ncc2Cl)cccc1OCc1ccc(F)cc1,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +400,3816,CCOc1ccc2c(-c3ccnc(Nc4cccc(NC(C)=O)c4)n3)cnn2n1,,,,,,,,,,,,7.795880017344074,,,,,,,,,,1.0,, +401,3740,CN1CCN(Cc2ccc(Nc3ncc(Cl)c(Nc4ccc(C#N)c(OCc5ccccc5F)c4)n3)cc2)CC1,,,,,,6.578396073130168,,,,,,,,,,,,,1.0,,,,, +402,3741,N#Cc1c(Nc2nc(Nc3ccc(CN4CCOCC4)cc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +403,3789,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)[C@@H]4COc5ccccc5O4)C3)n2)cc(OC)c1OC,,,,,,,,,,,,5.920818753952374,,,,,,,,,,0.0,, +404,3814,CCOc1ccc2c(-c3ccnc(Nc4ccc(S(C)(=O)=O)cc4)n3)cnn2n1,,,,,,,,,,,,7.853871964321761,,,,,,,,,,1.0,, +405,3825,CCOc1ccc2c(-c3ccnc(Nc4cccc(C(=O)N5CCN(C)CC5)c4)n3)cnn2n1,,,,,,,,,,,,6.815308569182401,,,,,,,,,,1.0,, +406,3792,COc1cc(Nc2nccc(N3CC[C@@H](NC(=O)[C@H]4COc5ccccc5O4)C3)n2)cc(OC)c1OC,,,,,,,,,,,,5.337242168318426,,,,,,,,,,0.0,, +407,3837,Cc1cc(Nc2nccc(-c3cnn4ncccc34)n2)ccc1N1CCN(C)CC1,,,,,,,,,,,,6.060480747381381,,,,,,,,,,1.0,, +408,3731,COc1cc(Nc2ncc(Cl)c(Nc3cccc(OCc4ccccc4F)c3C#N)n2)ccc1N1CCNCC1,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +409,3818,CCOc1ccc2c(-c3ccnc(Nc4ccc(Cl)c(Cl)c4)n3)cnn2n1,,,,,,,,,,,,7.568636235841011,,,,,,,,,,1.0,, +410,3728,N#Cc1c(Nc2nc(Nc3ccc(N4CCN(CCO)CC4)cc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +411,3725,N#Cc1c(Nc2nc(Nc3ccc(N4CCNCC4)cc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +412,3736,N#Cc1c(Nc2nc(Nc3ccc(C(=O)N4CCNCC4)cc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +413,3834,CN1CCN(c2cccc(Nc3nccc(-c4cnn5ncccc45)n3)c2)CC1,,,,,,,,,,,,6.300162274132754,,,,,,,,,,1.0,, +414,3730,CN1CCN(c2ccc(Nc3ncc(Cl)c(Nc4cccc(OCc5ccccc5F)c4C#N)n3)cc2)CC1,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +415,2092,Cc1cc(Nc2nccc(-c3cccnc3)n2)ccc1NC(=O)c1cccc(NC(=O)/C=C/CN(C)C)c1,,,,,,,,,,,,6.14266750356873,,,,,,,,,,1.0,, +416,3826,COc1ccc2c(-c3ccnc(Nc4ccc(N5CCN(C)CC5)cc4)n3)cnn2n1,,,,,,,,,,,,6.7594507517174,,,,,,,,,,1.0,, +417,3727,COc1cnc(Nc2ccc(N3CCNCC3)cc2)nc1Nc1cccc(OCc2ccccc2F)c1C#N,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +418,3744,N#Cc1c(Nc2nc(Nc3ccc(N4CCNCC4)nc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +419,3726,CN1CCN(c2ccc(Nc3ncc(Cl)c(Nc4cccc(OCc5c(F)cccc5F)c4C#N)n3)cc2)CC1,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +420,3835,CCOc1ccc2c(-c3ccnc(Nc4cccc(S(=O)(=O)C(F)(F)F)c4)n3)cnn2n1,,,,,,,,,,,,6.291579099865286,,,,,,,,,,1.0,, +421,3824,CCOc1ccc2c(-c3ccnc(Nc4cccc(C(N)=O)c4)n3)cnn2n1,,,,,,,,,,,,6.863279432843592,,,,,,,,,,1.0,, +422,3745,N#Cc1c(Nc2nc(Nc3ccc(CN4CCNCC4)cc3)ncc2Cl)cccc1OCc1ccccc1F,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +423,2094,CN(C)CCCC(=O)Nc1cccc(C(=O)Nc2ccc(Nc3nccc(-c4cccnc4)n3)cc2)c1,,,,,,,,,,,,5.616184634019568,,,,,,,,,,0.0,, +424,3793,COc1cc(Nc2nccc(N3CC[C@H](NC(=O)[C@H]4COc5ccccc5O4)C3)n2)cc(OC)c1OC,,,,,,,,,,,,5.070581074285707,,,,,,,,,,0.0,, +425,3729,COc1cc(N2CCNCC2)ccc1Nc1ncc(Cl)c(Nc2cccc(OCc3ccccc3F)c2C#N)n1,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +426,3724,C[C@H](Oc1cccc(Nc2nc(Nc3ccc(N4CCN(C)CC4)cc3)ncc2Cl)c1C#N)c1ccccc1F,,,,,,7.30102999566398,,,,,,,,,,,,,1.0,,,,, +427,3828,COc1ccc2c(-c3ccnc(Nc4ccc(N5CCN(C)CC5)c(F)c4)n3)cnn2n1,,,,,,,,,,,,6.707743928643524,,,,,,,,,,1.0,, +428,3737,COc1ccccc1COc1cccc(Nc2nc(Nc3ccc(N4CCNCC4)cc3)ncc2Cl)c1C#N,,,,,,6.823908740944318,,,,,,,,,,,,,1.0,,,,, +429,2099,Cc1ccc(-c2ccnc(Nc3ccc(NC(=O)c4cccc(NC(=O)/C=C/CN(C)C)c4)cc3)n2)cn1,,,,,,,,,,,,6.070581074285706,,,,,,,,,,1.0,, diff --git a/atomsci/ddm/test/integrative/sampling_test/nanobret_multitask_classification_data_train_valid_test_multitaskscaffold_e34ba827-a532-4313-9e63-8a9b0ed18ba9.csv b/atomsci/ddm/test/integrative/sampling_test/nanobret_multitask_classification_data_train_valid_test_multitaskscaffold_e34ba827-a532-4313-9e63-8a9b0ed18ba9.csv new file mode 100755 index 00000000..9699217d --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/nanobret_multitask_classification_data_train_valid_test_multitaskscaffold_e34ba827-a532-4313-9e63-8a9b0ed18ba9.csv @@ -0,0 +1,431 @@ +cmpd_id,subset,fold +PAR_272,train,0 +PAR_315,train,0 +ZDG_7_52_4,train,0 +HO_N_101,train,0 +HO_N_57_1,train,0 +BA_03_61_01,train,0 +PAR_182,train,0 +BA_03_69_c,train,0 +PAR_334,train,0 +ZDG_7_50_2,train,0 +3746,train,0 +ATH686,train,0 +TL01_022,train,0 +BA_03_53_12,train,0 +PAR_379,train,0 +PAR_331,train,0 +PAR_403,train,0 +PAR_380,train,0 +HO_N_90,train,0 +PAR_142,train,0 +BA_03_56_12,train,0 +TL01_026,train,0 +GW843682,train,0 +BA_03_60_04,train,0 +HO_N_135_2_A,train,0 +AST487,train,0 +PAR_310,train,0 +PAR_268,train,0 +HO_N_133_2_A,train,0 +TL01_019,train,0 +PAR_291,train,0 +ZDG_7_40_C,train,0 +HO_N_136_6_A,train,0 +BA_03_59_02,train,0 +ZDG_7_44_1,train,0 +PAR_294,train,0 +BBT594,train,0 +PAR_298,train,0 +PAR_269,train,0 +PAR_404,train,0 +HO_N_96,train,0 +PAR_261,train,0 +HO_N_42,train,0 +BA_03_50_15,train,0 +PAR_158,train,0 +PAR_402,train,0 +3820,train,0 +BA_03_50_18,train,0 +PAR_112,train,0 +BA_03_56_11,train,0 +BA_03_61_07,train,0 +PAR_314,train,0 +PAR_157,train,0 +PAR_109,train,0 +PAR_159,train,0 +HO_N_67,train,0 +HO_N_57_2,train,0 +PAR_320,train,0 +PAR_377,train,0 +PAR_383,train,0 +Bafetinib,train,0 +HO_N_131_5_A,train,0 +PAR_371,train,0 +PAR_369,train,0 +ZDG_6_49_1,train,0 +ZDG_7_33_C,train,0 +BA_03_51_15,train,0 +BA_03_66_a,train,0 +BA_03_53_11,train,0 +ZDG_7_25_3,train,0 +HO_N_116,train,0 +Brepocitinib,train,0 +HO_N_115,train,0 +BA_03_50_20,train,0 +BA_03_69_a,train,0 +HO_N_137_A,train,0 +PAR_249,train,0 +PAR_361,train,0 +PAR_274,train,0 +ZDG_5_55_6,train,0 +BA_03_50_21,train,0 +HO_N_95,train,0 +EF_3_101,train,0 +ZDG_6_50_1,train,0 +ZDG_7_52_6,train,0 +HO_N_104,train,0 +HO_N_99,train,0 +PAR_277,train,0 +BA_03_50_01,train,0 +BA_03_55_12,train,0 +ZDG_7_23_1,train,0 +EF_3_103,train,0 +PAR_299,train,0 +BA_03_50_22,train,0 +ZDG_6_59_1,train,0 +ZDG_7_35_B,train,0 +BA_03_59_04,train,0 +PAR_322,train,0 +PAR_342,train,0 +HO_N_139_2,train,0 +HO_N_129,train,0 +ZDG_7_41_A,train,0 +HO_N_135_2_E,train,0 +PAR_374,train,0 +BA_03_59_10,train,0 +PAR_311,train,0 +ZDG_5_55_7,train,0 +ZDG_7_37_B,train,0 +ZDG_7_11,train,0 +ZDG_7_43_2,train,0 +PAR_205,train,0 +ZDG_7_27_1,train,0 +CE_245677,train,0 +ZDG_7_14,train,0 +HO_N_136_2_E,train,0 +BA_03_59_03,train,0 +PAR_394,train,0 +BA_03_51_19,train,0 +PAR_398,train,0 +ZDG_7_21_2,train,0 +HO_N_98,train,0 +PAR_90,train,0 +BA_03_59_05,train,0 +PAR_338,train,0 +ZDG_7_44_4,train,0 +BA_03_59_07,train,0 +ZDG_6_48_4,train,0 +BA_03_65_c,train,0 +BA_03_61_04,train,0 +ZDG_6_60,train,0 +HO_N_133_1_A,train,0 +PAR_360,train,0 +3723,train,0 +BA_03_80_A,train,0 +BA_03_60_03,train,0 +BA_03_66_h,train,0 +ZDG_7_24_1,train,0 +PAR_168,train,0 +PAR_111,train,0 +BA_03_60_02,train,0 +EF_3_105,train,0 +PAR_321,train,0 +PAR_318,train,0 +Derazantinib,train,0 +PAR_345,train,0 +PAR_381,train,0 +ZDG_6_50_4,train,0 +TL01_010,train,0 +TL01_024,train,0 +HO_N_135_7_A,train,0 +ZDG_6_48_2,train,0 +PAR_370,train,0 +PAR_241,train,0 +ZDG_7_26_1,train,0 +Tpl2_Kinase_Inhibitor_1,train,0 +BA_03_61_10,train,0 +ZDG_7_46_1,train,0 +PAR_162,train,0 +3842,train,0 +BA_03_50_11,train,0 +AMG_47a,train,0 +Golvatinib,train,0 +PAR_372,train,0 +PAR_113,train,0 +BA_03_51_18,train,0 +PAR_316,train,0 +3732,train,0 +S116836,train,0 +BA_03_50_19,train,0 +PAR_356,train,0 +BA_03_50_07,train,0 +HO_N_138_2_A,train,0 +HO_N_73,train,0 +BA_03_65_a,train,0 +ZDG_6_38,train,0 +TL01_023,train,0 +PAR_333,train,0 +ZDG_6_48_3,train,0 +HO_N_71,train,0 +ZDG_7_24_2,train,0 +BA_03_59_01,train,0 +PAR_395,train,0 +ZDG_6_59_3,train,0 +PAR_309,train,0 +BA_03_61_05,train,0 +ZDG_7_40_B,train,0 +PAR_138,train,0 +HO_N_131_5_E,train,0 +BA_03_56_04,train,0 +XL_019,train,0 +PAR_139,train,0 +ZDG_6_50_3,train,0 +BA_03_51_22,train,0 +PAR_271,train,0 +3735,train,0 +ba_03_55_11,train,0 +PAR_260,train,0 +HO_N_100,train,0 +PAR_348,train,0 +PAR_183,train,0 +HO_N_49,train,0 +HO_N_110,train,0 +G1T38,train,0 +ZDG_6_47,train,0 +ZDG_7_32_B,train,0 +PAR_312,train,0 +PAR_275,train,0 +ZDG_7_24_3,train,0 +HO_N_136_2_A,train,0 +ZDG_7_50_1,train,0 +ZDG_7_35_A,train,0 +3838,train,0 +ALW_II_49_7,train,0 +BA_03_50_13,train,0 +BA_03_50_12,train,0 +BA_03_61_02,train,0 +Ifidancitinib,train,0 +ZDG_7_35_C,train,0 +GSK461364,train,0 +PAR_141,train,0 +ZDG_7_26_3,train,0 +BA_03_50_03,train,0 +PAR_378,train,0 +PAR_248,train,0 +ZDG_7_34_B,train,0 +PAR_353,train,0 +PAR_140,train,0 +ZDG_7_51_7,train,0 +ZDG_6_72,train,0 +ZDG_7_53_1,train,0 +Nimucitinib,train,0 +HO_N_83,train,0 +PAR_110,train,0 +ZDG_6_41,train,0 +ZDG_7_47_3,train,0 +BA_03_53_04,train,0 +ZDG_7_9,train,0 +ZDG_7_37_C,train,0 +ZDG_7_50_5,train,0 +ZDG_7_50_3,train,0 +PAR_108,train,0 +TIE_2_VEGFR_2_kinase_IN_2,train,0 +ZDG_7_36_C,train,0 +3739,train,0 +ZDG_6_48_1,train,0 +ZDG_7_23_2,train,0 +PAR_408,train,0 +3821,train,0 +BA_03_66_c,train,0 +HS_1371,train,0 +3722,train,0 +3738,train,0 +3740,train,0 +3741,train,0 +3825,train,0 +3837,train,0 +3731,train,0 +3728,train,0 +3725,train,0 +3736,train,0 +3834,train,0 +3730,train,0 +3826,train,0 +3727,train,0 +3744,train,0 +3726,train,0 +3745,train,0 +3729,train,0 +3724,train,0 +3828,train,0 +3737,train,0 +PAR_337,valid,0 +Narazaciclib,valid,0 +PAR_363,valid,0 +PAR_335,valid,0 +HO_N_135_4_A,valid,0 +3827,valid,0 +Altiratinib,valid,0 +PAR_252,valid,0 +ZDG_7_43_3,valid,0 +PAR_376,valid,0 +PAR_375,valid,0 +BA_03_53_01,valid,0 +EF_3_203,valid,0 +ZDG_7_51_5,valid,0 +TL01_020,valid,0 +GSK2606414,valid,0 +Tovorafenib,valid,0 +PAR_323,valid,0 +HO_N_105,valid,0 +GCN2_IN_1,valid,0 +ZDG_7_39_A,valid,0 +PAR_406,valid,0 +BA_03_55_01,valid,0 +Axitinib,valid,0 +3829,valid,0 +ZDG_7_31_A,valid,0 +HO_N_133_4_A,valid,0 +2093,valid,0 +PAR_405,valid,0 +HO_N_136_5_A,valid,0 +Encorafenib,valid,0 +BA_03_56_14,valid,0 +3819,valid,0 +Rac_CCT_250863,valid,0 +PAR_330,valid,0 +PAR_382,valid,0 +ZDG_7_38_A,valid,0 +BA_03_55_14,valid,0 +HO_N_136_4_A,valid,0 +HO_N_135_4_E,valid,0 +BA_03_50_08,valid,0 +BA_03_53_14,valid,0 +HO_N_134_E,valid,0 +PAR_228,valid,0 +ZDG_6_48_7,valid,0 +HO_N_136_3_A,valid,0 +BA_03_53_06,valid,0 +HO_N_134_A,valid,0 +ZDG_7_39_C,valid,0 +PAR_324,valid,0 +HO_N_132_3_E,valid,0 +3853,valid,0 +3790,valid,0 +TC_S_7005,valid,0 +PAR_400,valid,0 +RSS0680,valid,0 +ALK_kinase_inhibitor_1,valid,0 +HO_N_135_5_A,valid,0 +ZDG_7_42_1,valid,0 +HO_N_135_1_A,valid,0 +HO_N_132_3_A,valid,0 +EF_3_201,valid,0 +PAR_327,valid,0 +HO_N_135_8_A,valid,0 +PAR_336,valid,0 +3791,valid,0 +BA_03_56_01,valid,0 +BA_03_53_08,valid,0 +PAR_401,valid,0 +BA_03_53_13,valid,0 +KW_2449,valid,0 +PAR_270,valid,0 +BA_03_53_05,valid,0 +PAR_357,valid,0 +ZDG_6_59_2,valid,0 +3833,valid,0 +HO_N_136_7_A,valid,0 +3801,valid,0 +HO_N_133_3_E,valid,0 +HO_N_140,valid,0 +HO_N_62,valid,0 +3823,valid,0 +HO_N_133_3_A,valid,0 +PAR_396,valid,0 +ZDG_7_41_3,valid,0 +ON1231320,valid,0 +BA_03_61_03,valid,0 +PAR_81,valid,0 +3856,valid,0 +3743,valid,0 +PAR_149,valid,0 +PAR_247,valid,0 +CHZ868,valid,0 +3822,valid,0 +3816,valid,0 +3789,valid,0 +3814,valid,0 +3792,valid,0 +3818,valid,0 +2092,valid,0 +3835,valid,0 +3824,valid,0 +2094,valid,0 +3793,valid,0 +2099,valid,0 +BA_03_50_04,test,0 +GSK329,test,0 +PAR_225,test,0 +ZDG_7_46_3,test,0 +HO_N_138_3_A,test,0 +ZDG_6_64,test,0 +BA_03_78_d,test,0 +PAR_355,test,0 +ZDG_6_75_4,test,0 +BA_03_50_05,test,0 +ZDG_2_91,test,0 +ZDG_6_51_2,test,0 +ZDG_7_48_1,test,0 +ZDG_7_44_3,test,0 +ZDG_7_45_1,test,0 +ZDG_7_47_1,test,0 +Culmerciclib,test,0 +3_IN_PP1,test,0 +GCN2iB,test,0 +Tivozanib_hydrochloride_hydrate,test,0 +PAR_89,test,0 +PAR_244,test,0 +MAX_40279,test,0 +ZDG_7_46_4,test,0 +ZDG_7_43_1,test,0 +ZDG_6_61_N2,test,0 +PAR_358,test,0 +BA_03_66_b,test,0 +ZDG_2_93,test,0 +BA_03_60_05,test,0 +ZDG_6_75_3,test,0 +BA_03_60_01,test,0 +ZDG_7_43_4,test,0 +BA_03_60_10,test,0 +ZDG_6_75_2,test,0 +ZDG_7_47_2,test,0 +ZDG_6_67,test,0 +BA_03_50_06,test,0 +BSc5367,test,0 +2096,test,0 +Casein_Kinase_II_Inhibitor_IV,test,0 +BI_882370,test,0 +PAR_351,test,0 +ZDG_7_15,test,0 +BA_03_78_c,test,0 +BA_03_60_07,test,0 +PAR_354,test,0 +ZDG_7_45_2,test,0 +PAR_362,test,0 +HO_N_133_5_A,test,0 +ZDG_2_92,test,0 +TBAP_001,test,0 +ZDG_6_66,test,0 +BA_03_65_b,test,0 diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_NN_SMOTE.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_NN_SMOTE.json new file mode 100644 index 00000000..41febfb6 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_NN_SMOTE.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"splitter": "random", +"sampling_method": "SMOTE", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_NN_undersampling.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_NN_undersampling.json new file mode 100644 index 00000000..86f7bcf2 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_NN_undersampling.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"splitter": "random", +"sampling_method": "undersampling", +"smiles_col": "base_rdkit_smiles", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_RF_SMOTE.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_RF_SMOTE.json new file mode 100644 index 00000000..d2b95fb8 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_RF_SMOTE.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"splitter": "random", +"sampling_method": "SMOTE", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_RF_undersampling.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_RF_undersampling.json new file mode 100644 index 00000000..fda25eff --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_RF_undersampling.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"splitter": "random", +"sampling_method": "undersampling", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"smiles_col": "base_rdkit_smiles", +"id_col": "compound_id", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_xgboost_SMOTE.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_xgboost_SMOTE.json new file mode 100644 index 00000000..dafca23c --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_xgboost_SMOTE.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"sampling_method": "SMOTE", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_xgboost_undersampling.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_xgboost_undersampling.json new file mode 100644 index 00000000..14b43f1c --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/kfold_cv_xgboost_undersampling.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"sampling_method": "undersampling", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_NN_SMOTE.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_NN_SMOTE.json new file mode 100644 index 00000000..7fdbb4d3 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_NN_SMOTE.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"splitter": "scaffold", +"sampling_method": "SMOTE", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_NN_undersampling.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_NN_undersampling.json new file mode 100644 index 00000000..bd771283 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_NN_undersampling.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"sampling_method": "undersampling", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_RF_SMOTE.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_RF_SMOTE.json new file mode 100644 index 00000000..88a761f7 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_RF_SMOTE.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"sampling_method": "SMOTE", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_RF_undersampling.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_RF_undersampling.json new file mode 100644 index 00000000..e88717a1 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_RF_undersampling.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"sampling_method": "undersampling", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_xgboost_SMOTE.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_xgboost_SMOTE.json new file mode 100644 index 00000000..8845062f --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_xgboost_SMOTE.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"sampling_method": "SMOTE", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_xgboost_undersampling.json b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_xgboost_undersampling.json new file mode 100644 index 00000000..840333e1 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/sampling_json/train_valid_test_xgboost_undersampling.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"sampling_method": "undersampling", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"response_cols":"active", +"smiles_col": "base_rdkit_smiles", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/test_sampling.py b/atomsci/ddm/test/integrative/sampling_test/test_sampling.py new file mode 100644 index 00000000..d859a4ac --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/test_sampling.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python +"""Testing the sampling methods. Want to ensure that the model pipeline works and that the sampling methods are incorporated. +Based off of the test_kfold_split.py method. """ +import sklearn.metrics as skmetrics +import copy +import os +import json + +from atomsci.ddm.pipeline import model_pipeline as mp +from atomsci.ddm.pipeline import parameter_parser as parse +import atomsci.ddm.pipeline.predict_from_model as pfm + +import sys +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from integrative_utilities import extract_seed, get_test_set, find_best_test_metric + +#------------------------------------------------------------------- + +def saved_model_identity(pparams): + script_path = os.path.dirname(os.path.realpath(__file__)) + retrain_pparams = copy.copy(pparams) + + model_pipe = mp.ModelPipeline(pparams) + + if not pparams.previously_split: + split_uuid = model_pipe.split_dataset() + pparams.split_uuid = split_uuid + pparams.previously_split = True + pparams.split_only=False + + model_pipe.train_model() + + split_csv = os.path.join(script_path, '../../test_datasets/', model_pipe.data._get_split_key()) + test_df = get_test_set(pparams.dataset_key, split_csv, pparams.id_col) + + # load model metrics from file + with open(os.path.join(pparams.output_dir, 'model_metrics.json'), 'r') as f: + model_metrics = json.load(f) + + metrics = find_best_test_metric(model_metrics) + original_accuracy = metrics['prediction_results']['accuracy_score'] + original_precision = metrics['prediction_results']['precision'] + original_recall = metrics['prediction_results']['recall_score'] + original_prc_auc = metrics['prediction_results']['prc_auc_score'] + + id_col = metrics['input_dataset']['id_col'] + response_col=metrics['input_dataset']['response_cols'][0] + smiles_col = metrics['input_dataset']['smiles_col'] + test_length = metrics['prediction_results']['num_compounds'] + + # predict from model + model_tar = model_pipe.params.model_tarball_path + pred_df = pfm.predict_from_model_file(model_tar, test_df, id_col=id_col, + smiles_col=smiles_col, response_col=response_col) + # generate another prediction from the same model file + pred_df2 = pfm.predict_from_model_file(model_tar, test_df, id_col=id_col, smiles_col=smiles_col, response_col=response_col) + + X = pred_df[response_col+'_actual'].values + y = pred_df[response_col+'_pred'].values + + accuracy = skmetrics.accuracy_score(X, y) + precision = skmetrics.precision_score(X, y, average='weighted') + recall = skmetrics.recall_score(X, y, average='weighted') + prc_auc = skmetrics.average_precision_score(X, y) + + # return the metrics from the second prediction + X2 = pred_df2[response_col+'_actual'].values + y2 = pred_df2[response_col+'_pred'].values + + x2_accuracy = skmetrics.accuracy_score(X2, y2) + x2_precision = skmetrics.precision_score(X2, y2, average='weighted') + x2_recall = skmetrics.recall_score(X2, y2, average='weighted') + x2_prc_auc = skmetrics.average_precision_score(X2, y2) + + #saved_accuracy = metrics['prediction_results']['accuracy_score'] + #saved_precision = metrics['prediction_results']['precision'] + #saved_recall = metrics['prediction_results']['recall_score'] + #saved_prc_auc = metrics['prediction_results']['prc_auc_score'] + + # show results and compare the two predictions + print(metrics['subset']) + print(pred_df.columns) + print("Prediction results") + print("Accuracy difference:", abs(accuracy - x2_accuracy)) + print("Precision difference:", abs(precision - x2_precision)) + print("Recall difference:", abs(recall-x2_recall)) + print("PRC AUC difference:", abs(prc_auc-x2_prc_auc)) + + assert abs(accuracy - x2_accuracy) < 1e-9 \ + and abs(precision - x2_precision) < 1e-9 \ + and abs(recall - x2_recall) < 1e-9 \ + and abs(prc_auc - x2_prc_auc) < 1e-9 \ + and (test_length == len(test_df)) + + # create another test to ensure that the sampling methods are reproducible with the seed + metadata_path = os.path.join(pparams.output_dir, 'model_metadata.json') + seed = extract_seed(metadata_path) + + # create a duplicate parameters and add the seed + retrain_pparams.seed = seed + retrain_pparams.model_uuid = None + + # retrain the model + retrain_pipe = mp.ModelPipeline(retrain_pparams) + retrain_pipe.train_model() + + # extract the metrics from the retrained model + with open(os.path.join(retrain_pparams.output_dir, 'model_metrics.json'), 'r') as f: + retrained_model_metrics = json.load(f) + + retrained_metrics = find_best_test_metric(retrained_model_metrics) + retrained_accuracy = retrained_metrics['prediction_results']['accuracy_score'] + retrained_precision = retrained_metrics['prediction_results']['precision'] + retrained_recall = retrained_metrics['prediction_results']['recall_score'] + retrained_prc_auc = retrained_metrics['prediction_results']['prc_auc_score'] + + print("Model reproducibility results") + print("Accuracy difference:", abs(original_accuracy-retrained_accuracy)) + print("Precision difference:", abs(original_precision-retrained_precision)) + print("Recall difference:", abs(original_recall-retrained_recall)) + print("PRC AUC difference:", abs(original_prc_auc-retrained_prc_auc)) + + assert abs(original_accuracy - retrained_accuracy) < 1e-9 \ + and abs(original_precision - retrained_precision) < 1e-9 \ + and abs(original_recall - retrained_recall) < 1e-9 \ + and abs(original_prc_auc - retrained_prc_auc) < 1e-9 +#------------------------------------------------------------------- + +#-------- random forest +def test_train_valid_test_RF_SMOTE(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/train_valid_test_RF_SMOTE.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_k_fold_cv_RF_SMOTE(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/kfold_cv_RF_SMOTE.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_k_fold_cv_RF_undersampling(): + + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/kfold_cv_RF_undersampling.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_train_valid_test_RF_undersampling(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/train_valid_test_RF_undersampling.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +#-------- neural network + +def test_train_valid_test_NN_SMOTE(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/train_valid_test_NN_SMOTE.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_train_valid_test_NN_undersampling(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/train_valid_test_NN_undersampling.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_k_fold_cv_NN_SMOTE(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/kfold_cv_NN_SMOTE.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_k_fold_cv_NN_undersampling(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/kfold_cv_NN_undersampling.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +#-------- xgboost + +def test_train_valid_test_xgboost_SMOTE(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/train_valid_test_xgboost_SMOTE.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_train_valid_test_xgboost_undersampling(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/train_valid_test_xgboost_undersampling.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_k_fold_cv_xgboost_SMOTE(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/kfold_cv_xgboost_SMOTE.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) + +def test_k_fold_cv_xgboost_undersampling(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'sampling_json/kfold_cv_xgboost_undersampling.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key= os.path.join(script_path, + '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + pparams.split_uuid= 'test-split' + + saved_model_identity(pparams) +#------------------------------------------------------------------- + +if __name__=='__main__': + #print('train_valid_test_RF_SMOTE_test') + #test_train_valid_test_RF_SMOTE() + + #print('train_valid_test_NN_SMOTE_test') + #test_train_valid_test_NN_SMOTE() + + #print("train_valid_test_RF_undersampling_test") + #test_train_valid_test_RF_undersampling() + + #print("train_valid_test_NN_undersampling_test") + #test_train_valid_test_NN_undersampling() + + #print("kfold_cv_NN_SMOTE_test") + #test_k_fold_cv_NN_SMOTE() + + #print("kfold_cv_NN_undersampling_test") + #test_k_fold_cv_NN_undersampling() + + print("kfold_cv_RF_SMOTE_test") + test_k_fold_cv_RF_SMOTE() + + #print("kfold_cv_RF_undersampling_test") + #test_k_fold_cv_RF_undersampling() + + #print("train_valid_test_xgboost_SMOTE_test") + #test_train_valid_test_xgboost_SMOTE() + + #print("train_valid_test_xgboost_undersampling_test") + #test_train_valid_test_xgboost_undersampling() + + #print("k_fold_cv_xgboost_SMOTE_test") + #test_k_fold_cv_xgboost_SMOTE() + + #print("k_fold_cv_xgboost_undersampling_test") + #test_k_fold_cv_xgboost_undersampling() + + print("Passed!") \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/sampling_test/test_sampling_mtss_model.py b/atomsci/ddm/test/integrative/sampling_test/test_sampling_mtss_model.py new file mode 100755 index 00000000..d60c4551 --- /dev/null +++ b/atomsci/ddm/test/integrative/sampling_test/test_sampling_mtss_model.py @@ -0,0 +1,77 @@ +# smote/undersampling multitask test + +import os +import atomsci.ddm.pipeline.model_pipeline as mp +import atomsci.ddm.pipeline.parameter_parser as parse +import pytest + + +def test_mtss_model(): + script_path = os.path.dirname(os.path.realpath(__file__)) + dataset_file = os.path.join(script_path, "nanobret_multitask_classification_data.csv") + split_uuid="e34ba827-a532-4313-9e63-8a9b0ed18ba9" + odir = os.path.join(script_path, "output") + + id_col="compound_id" + smiles_col="base_rdkit_smiles" + response_cols="NEK1_active,NEK2_active,NEK3_active,NEK5_active,NEK9_active" + + params = { + # logistics input + "dataset_key": dataset_file, + "smiles_col": smiles_col, + "prediction_type": "classification", + "split_uuid": split_uuid, + "splitter": "multitaskscaffold", + "response_cols": response_cols, + "previously_split": "True", + + # dataset + "id_col": id_col, + "result_dir": odir, + + # featurization and model + "featurizer": "computed_descriptors", + "descriptor_type": "rdkit_raw", + "model_type": "NN", + # grid search + "max_epochs": "300", + "early_stopping_patience": "100", + "sampling_method":"SMOTE", + "layer_sizes": "128,128,128", + "dropouts": "0.1,0.1,0.10", + "learning_rates": "0.0007", + + # extras, can be deleted as needed + "system": "LC", + "verbose": "True", + } + ampl_param = parse.wrapper(params) + pl = mp.ModelPipeline(ampl_param) + with pytest.raises(ValueError) as e: + # this should say + # Imbalanced-learn currently supports binary, multiclass and binarized encoded multiclasss targets. Multilabel and multioutput targets are not supported. + pl.train_model() + print("done") + +def test_imblearn_mtss_compatibility(): + # this just shows that all SMOTE methods do not work with multitask problems. + import sklearn.datasets as skdatasets + import numpy as np + from imblearn.over_sampling import SMOTE, ADASYN, BorderlineSMOTE, KMeansSMOTE, SVMSMOTE + + X, y = skdatasets.make_classification() + print(X.shape, y.shape) + multi_y = np.vstack([y, y, y]).transpose() + print(multi_y.shape) + + for sampler in [SMOTE, ADASYN, BorderlineSMOTE, KMeansSMOTE, SVMSMOTE]: + sm = sampler() + try: + _x, _y = sm.fit_resample(X, multi_y) + except Exception as e: + print(sm) + print(e) + +if __name__ == "__main__": + test_mtss_model() diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/attentivefp_regression_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/attentivefp_regression_train_valid_test.json new file mode 100644 index 00000000..c87a245b --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/attentivefp_regression_train_valid_test.json @@ -0,0 +1,19 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "AttentiveFPModel", +"AttentiveFPModel_num_layers":"3", +"AttentiveFPModel_learning_rate": "0.0007", +"AttentiveFPModel_n_tasks": "1", +"featurizer":"MolGraphConvFeaturizer", +"MolGraphConvFeaturizer_use_edges":"True", +"prediction_type": "regression", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"15"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/graphconv_classification_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/graphconv_classification_train_valid_test.json new file mode 100644 index 00000000..7f0380e2 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/graphconv_classification_train_valid_test.json @@ -0,0 +1,16 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "graphconv", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"uncertainty":"False", +"max_epochs":"15"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/graphconv_regression_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/graphconv_regression_train_valid_test.json new file mode 100644 index 00000000..3fd351fd --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/graphconv_regression_train_valid_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "graphconv", +"prediction_type": "regression", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"15"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/nn_classification_kfold_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/nn_classification_kfold_test.json new file mode 100644 index 00000000..7ce26e8b --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/nn_classification_kfold_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/nn_classification_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/nn_classification_train_valid_test.json new file mode 100644 index 00000000..c3eda35a --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/nn_classification_train_valid_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/nn_regression_kfold_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/nn_regression_kfold_test.json new file mode 100644 index 00000000..dede5f7b --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/nn_regression_kfold_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "regression", +"split_strategy": "k_fold_cv", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/nn_regression_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/nn_regression_train_valid_test.json new file mode 100644 index 00000000..ba86fcce --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/nn_regression_train_valid_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "NN", +"featurizer": "ecfp", +"prediction_type": "regression", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/pytorchmpnn_regression_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/pytorchmpnn_regression_train_valid_test.json new file mode 100644 index 00000000..20b2e0b2 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/pytorchmpnn_regression_train_valid_test.json @@ -0,0 +1,19 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "PytorchMPNNModel", +"PytrochMPNNModel_mode": "regression", +"PytorchMPNNModel_learning_rate": "0.001", +"PytorchMPNNModel_n_tasks": "1", +"featurizer":"MolGraphConvFeaturizer", +"MolGraphConvFeaturizer_use_edges":"True", +"prediction_type": "regression", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"15"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/rf_classification_kfold_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/rf_classification_kfold_test.json new file mode 100644 index 00000000..dd8f6b64 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/rf_classification_kfold_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/rf_classification_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/rf_classification_train_valid_test.json new file mode 100644 index 00000000..4777506f --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/rf_classification_train_valid_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/rf_regression_kfold_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/rf_regression_kfold_test.json new file mode 100644 index 00000000..30bcc444 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/rf_regression_kfold_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "regression", +"split_strategy": "k_fold_cv", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/rf_regression_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/rf_regression_train_valid_test.json new file mode 100644 index 00000000..05415938 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/rf_regression_train_valid_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "RF", +"featurizer": "ecfp", +"prediction_type": "regression", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_classification_kfold_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_classification_kfold_test.json new file mode 100644 index 00000000..7c62dcab --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_classification_kfold_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "k_fold_cv", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_classification_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_classification_train_valid_test.json new file mode 100644 index 00000000..a8072c5b --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_classification_train_valid_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "classification", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_regression_kfold_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_regression_kfold_test.json new file mode 100644 index 00000000..7bd80469 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_regression_kfold_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "regression", +"split_strategy": "k_fold_cv", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_regression_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_regression_train_valid_test.json new file mode 100644 index 00000000..a96c72a3 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/model_json/xgboost_regression_train_valid_test.json @@ -0,0 +1,15 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"model_type": "xgboost", +"featurizer": "ecfp", +"prediction_type": "regression", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"pIC50", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/split_json/test_fingerprint_train_valid_test.json b/atomsci/ddm/test/integrative/seed_test/split_json/test_fingerprint_train_valid_test.json new file mode 100644 index 00000000..bafe7399 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/split_json/test_fingerprint_train_valid_test.json @@ -0,0 +1,13 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"featurizer": "ecfp", +"split_strategy": "train_valid_test", +"splitter": "fingerprint", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_fingerprint_split.json b/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_fingerprint_split.json new file mode 100644 index 00000000..e03a34a0 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_fingerprint_split.json @@ -0,0 +1,13 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"featurizer": "ecfp", +"split_strategy": "k_fold_cv", +"splitter": "fingerprint", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_random_split.json b/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_random_split.json new file mode 100644 index 00000000..352e86c5 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_random_split.json @@ -0,0 +1,13 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"featurizer": "ecfp", +"split_strategy": "k_fold_cv", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_scaffold_split.json b/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_scaffold_split.json new file mode 100644 index 00000000..00976ccd --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/split_json/test_kfold_scaffold_split.json @@ -0,0 +1,13 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"featurizer": "ecfp", +"split_strategy": "k_fold_cv", +"splitter": "scaffold", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/split_json/test_random_train_valid_test_split.json b/atomsci/ddm/test/integrative/seed_test/split_json/test_random_train_valid_test_split.json new file mode 100644 index 00000000..188a8aa0 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/split_json/test_random_train_valid_test_split.json @@ -0,0 +1,13 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"featurizer": "ecfp", +"split_strategy": "train_valid_test", +"splitter": "random", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/split_json/test_scaffold_train_valid_test_split.json b/atomsci/ddm/test/integrative/seed_test/split_json/test_scaffold_train_valid_test_split.json new file mode 100644 index 00000000..f913cc9d --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/split_json/test_scaffold_train_valid_test_split.json @@ -0,0 +1,13 @@ +{"verbose": "True", +"datastore": "False", +"save_results": "False", +"featurizer": "ecfp", +"split_strategy": "train_valid_test", +"splitter": "scaffold", +"split_test_frac": "0.15", +"split_valid_frac": "0.15", +"transformers": "True", +"id_col": "compound_id", +"smiles_col": "base_rdkit_smiles", +"response_cols":"active", +"max_epochs":"100"} \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/test_seed_models.py b/atomsci/ddm/test/integrative/seed_test/test_seed_models.py new file mode 100644 index 00000000..a53d7397 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/test_seed_models.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +"""Testing the reproducibility of seeding a random seed in AMPL to reproduce models.""" +import pandas as pd +import copy +import os +import json + +from atomsci.ddm.pipeline import model_pipeline as mp +from atomsci.ddm.pipeline import parameter_parser as parse + +import sys +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from integrative_utilities import extract_seed, find_best_test_metric + +#------------------------------------------------------------------- +""" +This script does the following: +1. Generates a model +2. Extracts the seed from the model's metadata +3. Runs the model training again with seed +4. Compares the prediction scores to ensure they're identical + +Creates and tests the following models: +- RF, NN +- regression, classification +- train_valid_test split, k-fold cv split +""" +#------------------------------------------------------------------- +def saved_model_identity(pparams): + + retrain_pparams = copy.copy(pparams) + + model_pipe = mp.ModelPipeline(pparams) + + if not pparams.previously_split: + split_uuid = model_pipe.split_dataset() + pparams.split_uuid = split_uuid + pparams.previously_split = True + pparams.split_only = False + + model_pipe.train_model() + + # load model metrics from file + with open(os.path.join(pparams.output_dir, 'model_metrics.json'), 'r') as f: + model_metrics = json.load(f) + + original_metrics = find_best_test_metric(model_metrics) + if pparams.prediction_type == 'regression': + original_mae = original_metrics['prediction_results']['mae_score'] + original_r2 = original_metrics['prediction_results']['r2_score'] + original_rms_score = original_metrics['prediction_results']['rms_score'] + elif pparams.prediction_type == 'classification': + original_accuracy = original_metrics['prediction_results']['accuracy_score'] + original_precision = original_metrics['prediction_results']['precision'] + original_recall = original_metrics['prediction_results']['recall_score'] + original_prc_auc = original_metrics['prediction_results']['prc_auc_score'] + + + # extract the seed + metadata_path = os.path.join(pparams.output_dir, 'model_metadata.json') + seed = extract_seed(metadata_path) + + # add the seed to the params + retrain_pparams.seed = seed + retrain_pparams.model_uuid = None + + # retrain the model + retrain_pipe = mp.ModelPipeline(retrain_pparams) + retrain_pipe.train_model() + #retrain_pipe = train(pparams) + + # extract the metrics from the retrained model + with open(os.path.join(retrain_pparams.output_dir, 'model_metrics.json'), 'r') as f: + retrained_model_metrics = json.load(f) + + retrained_metrics = find_best_test_metric(retrained_model_metrics) + if pparams.prediction_type == 'regression': + retrained_mae = retrained_metrics['prediction_results']['mae_score'] + retrained_r2 = retrained_metrics['prediction_results']['r2_score'] + retrained_rms_score = retrained_metrics['prediction_results']['rms_score'] + elif pparams.prediction_type == 'classification': + retrained_accuracy = retrained_metrics['prediction_results']['accuracy_score'] + retrained_precision = retrained_metrics['prediction_results']['precision'] + retrained_recall = retrained_metrics['prediction_results']['recall_score'] + retrained_prc_auc = retrained_metrics['prediction_results']['prc_auc_score'] + + if pparams.prediction_type == 'regression': + print("MAE difference:", abs(original_mae-retrained_mae)) + print("R2 difference:", abs(original_r2 - retrained_r2)) + print("RMS Score difference:", abs(original_rms_score - retrained_rms_score)) + + assert abs(original_mae-retrained_mae) < 1e-9 \ + and abs(original_r2 - retrained_r2) < 1e-9 \ + and abs(original_rms_score - retrained_rms_score) < 1e-9 + + elif pparams.prediction_type == 'classification': + print("Accuracy difference:", abs(original_accuracy - retrained_accuracy)) + print("Precision difference:", abs(original_precision - retrained_precision)) + print("Recall difference:", abs(original_recall - retrained_recall)) + print("PRC AUC difference:", abs(original_prc_auc- retrained_prc_auc)) + + assert abs(original_accuracy - retrained_accuracy) < 1e-9 \ + and abs(original_precision - retrained_precision) < 1e-9 \ + and abs(original_recall - retrained_recall) < 1e-9 \ + and abs(original_prc_auc - retrained_prc_auc) < 1e-9 + +#------------------------------------------------------------------- +# Random Forest +def test_RF_regression_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/rf_regression_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_RF_classification_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/rf_classification_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_RF_regression_kfold_cv_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/rf_regression_kfold_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_RF_classification_kfold_cv_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/rf_classification_kfold_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +# Neural Network +def test_NN_regression_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/nn_regression_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_NN_regression_kfold_cv_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/nn_regression_kfold_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_NN_classification_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/nn_classification_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_NN_classification_kfold_cv_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/nn_classification_kfold_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +# XGBoost +def test_xgboost_regression_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/xgboost_regression_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_xgboost_classification_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/xgboost_classification_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_xgboost_regression_kfold_cv_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/xgboost_regression_kfold_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_xgboost_classification_kfold_cv_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/xgboost_classification_kfold_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +# graphconv +def test_graphconv_classification_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/graphconv_classification_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_graphconv_regression_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/graphconv_regression_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +# DCmodels +def test_attentivefp_regression_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/attentivefp_regression_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + +def test_pytorchmpnn_regression_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'model_json/pytorchmpnn_regression_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + saved_model_identity(pparams) + + +if __name__ == "__main__": + # ------ random forest + print("test_RF_regression_reproducibility") + test_RF_regression_reproducibility() + print("test_RF_regression_kfold_reproducibility") + test_RF_regression_kfold_cv_reproducibility() + print("test_RF_classification_reproducibility") + test_RF_classification_reproducibility() + print("test_RF_classification_kfold_reproducibility") + test_RF_classification_kfold_cv_reproducibility() + + # ------ neural network + print("test_NN_regression_reproducibility") + test_NN_regression_reproducibility() + print("test_NN_regression_kfold_reproducibility") + test_NN_regression_kfold_cv_reproducibility() + print("test_NN_classification_reproducibility") + test_NN_classification_reproducibility() + print("test_NN_classification_kfold_reproducibility") + test_NN_classification_kfold_cv_reproducibility() + + # ------ xgboost + print("test_xgboost_regression_reproducibility") + test_xgboost_regression_reproducibility() + print("test_xgboost_regression_kfold_reproducibility") + test_xgboost_regression_kfold_cv_reproducibility() + print("test_xgboost_classification_reproducibility") + test_xgboost_classification_reproducibility() + print("test_xgboost_classification_kfold_reproducibility") + test_xgboost_classification_kfold_cv_reproducibility() + + # ------ graphconv + print("test_graphconv_classification_reproducibility") + test_graphconv_classification_reproducibility() + print("test_graphconv_regression_reproducibility") + test_graphconv_regression_reproducibility() + + # ------ dcmodels + print("test_attentivefp_regression_reproducibility") + test_attentivefp_regression_reproducibility() + + print("test_pytorchmpnn_regression_reproducibility") + test_pytorchmpnn_regression_reproducibility() + + print("Passed!") + \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/seed_test/test_seed_splitting.py b/atomsci/ddm/test/integrative/seed_test/test_seed_splitting.py new file mode 100644 index 00000000..5feea923 --- /dev/null +++ b/atomsci/ddm/test/integrative/seed_test/test_seed_splitting.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +"""Testing the reproducibility of seeding a random seed in AMPL splitters to recreate split datasets.""" + +import pandas as pd +import copy +import json +import os + +from atomsci.ddm.pipeline import model_pipeline as mp +from atomsci.ddm.pipeline import parameter_parser as parse + +import sys +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from integrative_utilities import extract_seed, modify_params_with_seed + +#---------------------------------------------------------------------------------------------------------- +def split_dataset(pparams): + model_pipe = mp.ModelPipeline(pparams) + split_uuid = model_pipe.split_dataset() + pparams.split_uuid = split_uuid + return pparams + +def compare_splits(original_split_csv, retrained_split_csv): + original_split = pd.read_csv(original_split_csv) + retrained_split = pd.read_csv(retrained_split_csv) + + comparison_df = original_split.merge(retrained_split, on='cmpd_id', suffixes=('_original', '_retrained')) + + # Initialize a variable to track if all comparisons are valid + all_match = True + + # Iterate through rows to compare the 'subset' and 'fold' columns + for index, row in comparison_df.iterrows(): + subset_match = (row['subset_original'] == row['subset_retrained']) + fold_match = (row['fold_original'] == row['fold_retrained']) + + if not (subset_match and fold_match): + print(f"Mismatch found for cmpd_id {row['cmpd_id']}: " + f"original subset = {row['subset_original']}, " + f"retrained subset = {row['subset_retrained']}, " + f"original fold = {row['fold_original']}, " + f"retrained fold = {row['fold_retrained']}") + all_match = False + + return all_match + +def perform_splits_and_compare(pparams): + starting_pparams=split_dataset(pparams) + # original split + script_path = os.path.dirname(os.path.realpath(__file__)) + dataset_path = os.path.join(script_path, '../../test_datasets/') + if starting_pparams.split_strategy == 'k_fold_cv': + original_split_csv = os.path.join(dataset_path, f"{starting_pparams.dataset_name}_{starting_pparams.num_folds}_fold_cv_{starting_pparams.splitter}_{starting_pparams.split_uuid}.csv") + else: + original_split_csv = os.path.join(dataset_path, f"{starting_pparams.dataset_name}_{starting_pparams.split_strategy}_{starting_pparams.splitter}_{starting_pparams.split_uuid}.csv") + + # extract the seed + metadata_path = os.path.join(starting_pparams.output_dir, 'split_metadata.json') + seed = extract_seed(metadata_path) + + # Retrain split with the same seed + retrain_pparams = copy.copy(pparams) + retrain_pparams.split_uuid = None + retrain_pparams.seed = seed + + retrain_pparams = split_dataset(retrain_pparams) + script_path = os.path.dirname(os.path.realpath(__file__)) + dataset_path = os.path.join(script_path, '../../test_datasets/') + if starting_pparams.split_strategy == 'k_fold_cv': + retrained_split_csv = os.path.join(dataset_path, f"{retrain_pparams.dataset_name}_{retrain_pparams.num_folds}_fold_cv_{retrain_pparams.splitter}_{retrain_pparams.split_uuid}.csv") + else: + retrained_split_csv = os.path.join(dataset_path, f"{retrain_pparams.dataset_name}_{retrain_pparams.split_strategy}_{retrain_pparams.splitter}_{retrain_pparams.split_uuid}.csv") + + # Compare splits + + splits_match = compare_splits(original_split_csv, retrained_split_csv) + + if splits_match is True: + print("The splits match exactly!") + else: + print("The splits do not match.") + + return splits_match + +#---------------------------------------------------------------------------------------------------------- + +def test_random_train_valid_test_split_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'split_json/test_random_train_valid_test_split.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + perform_splits_and_compare(pparams) + +def test_scaffold_train_valid_test_split_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'split_json/test_scaffold_train_valid_test_split.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + perform_splits_and_compare(pparams) + +def test_fingerprint_train_valid_test_split_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'split_json/test_fingerprint_train_valid_test.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + perform_splits_and_compare(pparams) + +def test_kfold_random_split_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'split_json/test_kfold_random_split.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + perform_splits_and_compare(pparams) + +def test_kfold_scaffold_split_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'split_json/test_kfold_scaffold_split.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + perform_splits_and_compare(pparams) + +def test_kfold_fingerprint_split_reproducibility(): + script_path = os.path.dirname(os.path.realpath(__file__)) + json_file = os.path.join(script_path, 'split_json/test_kfold_fingerprint_split.json') + + pparams = parse.wrapper(['--config_file', json_file]) + pparams.dataset_key = os.path.join(script_path, '../../test_datasets/aurka_chembl_base_smiles_union.csv') + pparams.result_dir=script_path + + perform_splits_and_compare(pparams) + +#---------------------------------------------------------------------------------------------------------- + +if __name__ == "__main__": + print("test_random_train_valid_test_split_reproducibility") + test_random_train_valid_test_split_reproducibility() + + print("test_scaffold_train_valid_test_split_reproducibility") + test_scaffold_train_valid_test_split_reproducibility() + + print("test_fingerprint_train_valid_test_split_reproducibility") + test_fingerprint_train_valid_test_split_reproducibility() + + print("test_kfold_random_split_reproducibility") + test_kfold_random_split_reproducibility() + + print("test_kfold_scaffold_split_reproducibility") + test_kfold_scaffold_split_reproducibility() + + print("test_kfold_fingerprint_split_reproducibility") + test_kfold_fingerprint_split_reproducibility() + + print("Passed!") \ No newline at end of file diff --git a/atomsci/ddm/utils/model_file_reader.py b/atomsci/ddm/utils/model_file_reader.py index 308c21de..1cfb737e 100644 --- a/atomsci/ddm/utils/model_file_reader.py +++ b/atomsci/ddm/utils/model_file_reader.py @@ -194,7 +194,14 @@ def get_response_cols(self): """ return self.get_training_dataset().get('response_cols') - + + def get_random_seed(self): + """Returns: + (int): random seed used in model training. Returns None if not found. + + """ + return self.metadata_dict.get('seed') + def get_model_info(self): """Extract the model metadata (and if applicable, model metrics) diff --git a/atomsci/ddm/utils/model_retrain.py b/atomsci/ddm/utils/model_retrain.py index 1e376f5e..bb77aa3f 100644 --- a/atomsci/ddm/utils/model_retrain.py +++ b/atomsci/ddm/utils/model_retrain.py @@ -49,7 +49,7 @@ mlmt_supported = False -def train_model(input, output, dskey='', production=False): +def train_model(input, output, dskey='', production=False, keep_seed=False): """Retrain a model saved in a model_metadata.json file Args: @@ -76,6 +76,13 @@ def train_model(input, output, dskey='', production=False): # Parse parameters params = parse.wrapper(config) + + # keep or discard seed. + if keep_seed and params.seed is None: + raise RuntimeWarning("Expected to find random seed not found. Retraining using a new random seed.") + elif not keep_seed: + params.seed = None + params.result_dir = output # otherwise this will have the same uuid as the source model params.model_uuid = None @@ -101,7 +108,7 @@ def train_model(input, output, dskey='', production=False): return model -def train_model_from_tar(input, output, dskey='', production=False): +def train_model_from_tar(input, output, dskey='', production=False, keep_seed=False): """Retrain a model saved in a tar.gz file Args: @@ -111,6 +118,8 @@ def train_model_from_tar(input, output, dskey='', production=False): dskey (str): new dataset key if file location has changed + keep_seed (bool): True to keep the same random seed. + Returns: the model pipeline object with trained model """ @@ -122,9 +131,9 @@ def train_model_from_tar(input, output, dskey='', production=False): # make metadata path metadata_path = os.path.join(tmpdir, 'model_metadata.json') - return train_model(metadata_path, output, dskey=dskey, production=production) + return train_model(metadata_path, output, dskey=dskey, production=production, keep_seed=keep_seed) -def train_model_from_tracker(model_uuid, output_dir, production=False): +def train_model_from_tracker(model_uuid, output_dir, production=False, keep_seed=False): """Retrain a model saved in the model tracker, but save it to output_dir and don't insert it into the model tracker Args: @@ -158,6 +167,12 @@ def train_model_from_tracker(model_uuid, output_dir, production=False): #if config[] # Parse parameters params = parse.wrapper(config) + # keep or discard seed. + if keep_seed and params.seed is None: + raise RuntimeWarning("Expected to find random seed not found. Retraining using a new random seed.") + elif not keep_seed: + params.seed = None + params.result_dir = output_dir # otherwise this will have the same uuid as the source model params.model_uuid = None @@ -181,7 +196,7 @@ def train_model_from_tracker(model_uuid, output_dir, production=False): return model -def train_models_from_dataset_keys(input, output, pred_type='regression', production=False): +def train_models_from_dataset_keys(input, output, pred_type='regression', production=False, keep_seed=False): """Retrain a list of models from an input file Args: @@ -243,7 +258,7 @@ def train_models_from_dataset_keys(input, output, pred_type='regression', produc for model_uuid in best_mods.model_uuid.sort_values(): try: logger.debug('Training %s in %s' % (model_uuid, output)) - train_model_from_tracker(model_uuid, output, production=production) + train_model_from_tracker(model_uuid, output, production=production, keep_seed=keep_seed) except Exception: Exception(f'Error for model_uuid {model_uuid}') pass @@ -263,6 +278,7 @@ def main(argv): parser.add_argument('-dk', '--dataset_key', default='', help='Sometimes dataset keys get moved. Specify new location of dataset. Only works when passing in one model at time.') parser.add_argument('-pd_type', '--pred_type', default='regression', help='Specify the prediction type used for model retrain. The default is set to regression.') parser.add_argument('-prod', '--production', action='store_true', default=False, help='Retrain the model in production mode') + parser.add_argument('-keep_seed', '--keep_seed', action='store_true', default=False, help='Retrain the model using the saved seed if available.') args = parser.parse_args() @@ -277,19 +293,19 @@ def main(argv): if os.path.isdir(input): # loop for path in Path(input).rglob('model_metadata.json'): - train_model(path.absolute(), output, production=args.production) + train_model(path.absolute(), output, production=args.production, keep_seed=args.keep_seed) elif os.path.isfile(input): # 2 if it's a file, check if it's a json or tar.gz or file that contains list of dataset keys if input.endswith('.json'): - train_model(input, output, dskey=args.dataset_key, production=args.production) + train_model(input, output, dskey=args.dataset_key, production=args.production, keep_seed=args.keep_seed) elif input.endswith('.tar.gz'): - train_model_from_tar(input, output, dskey=args.dataset_key, production=args.production) + train_model_from_tar(input, output, dskey=args.dataset_key, production=args.production, keep_seed=args.keep_seed) else: - train_models_from_dataset_keys(input, output, pred_type=args.pred_type, production=args.production) + train_models_from_dataset_keys(input, output, pred_type=args.pred_type, production=args.production, keep_seed=args.keep_seed) else: try: # 3 try to process 'input' as uuid - train_model_from_tracker(input, output, production=args.production) + train_model_from_tracker(input, output, production=args.production, keep_seed=args.keep_seed) except Exception: Exception('Unrecognized input %s'%input) diff --git a/pip/cpu_requirements.txt b/pip/cpu_requirements.txt index f3feb350..00afdfa3 100644 --- a/pip/cpu_requirements.txt +++ b/pip/cpu_requirements.txt @@ -29,6 +29,8 @@ pyarrow bravado +imblearn + # optional for home users: prettier images in RDKit # requires pkg-config to build: sudo apt-get pkg-config # requires Cairo: sudo apt-get install libcairo2-dev diff --git a/pip/cuda_requirements.txt b/pip/cuda_requirements.txt index c223ad16..3b16ac75 100644 --- a/pip/cuda_requirements.txt +++ b/pip/cuda_requirements.txt @@ -31,6 +31,8 @@ pyarrow bravado +imblearn + # optional for home users: prettier images in RDKit # requires pkg-config to build: sudo apt-get pkg-config # requires Cairo: sudo apt-get install libcairo2-dev diff --git a/pip/mchip_requirements.txt b/pip/mchip_requirements.txt index 6bb58ff1..cf005cd4 100644 --- a/pip/mchip_requirements.txt +++ b/pip/mchip_requirements.txt @@ -30,6 +30,8 @@ pyarrow bravado +imblearn + # optional for home users: prettier images in RDKit # requires pkg-config to build: sudo apt-get pkg-config # requires Cairo: sudo apt-get install libcairo2-dev diff --git a/pip/rocm_requirements.txt b/pip/rocm_requirements.txt index 296f7b70..08601156 100644 --- a/pip/rocm_requirements.txt +++ b/pip/rocm_requirements.txt @@ -19,6 +19,8 @@ umap-learn pyarrow +imblearn + # requires pkg-config to build: sudo apt-get pkg-config # requires Cairo: sudo apt-get install libcairo2-dev # pycairo