rerun

sormazabal · sormazabal · commit 12f196e4f17d · 2024-06-21T18:44:20.000+08:00
diff --git a/MTL_Train_TCGA_Test_SCLC.ipynb b/MTL_Train_TCGA_Test_SCLC.ipynb
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -115,11 +115,11 @@
      "output_type": "stream",
      "text": [
       "BRCA\n",
-      "{'ACTB', 'EFTUD2', 'PLAU', 'DDX23', 'GSK3B', 'HSPA8', 'MKI67', 'ESR1', 'SHMT2', 'ERBB2', 'STAU1', 'SSR1', 'UBXN6', 'PRKRA', 'TUBA1C', 'SNIP1', 'YWHAB', 'PGR', 'BTRC', 'SRSF5'} 20\n",
+      "{'SHMT2', 'PLAU', 'TUBA1C', 'STAU1', 'MKI67', 'HSPA8', 'ESR1', 'PRKRA', 'DDX23', 'YWHAB', 'GSK3B', 'PGR', 'UBXN6', 'SNIP1', 'ACTB', 'BTRC', 'SRSF5', 'ERBB2', 'EFTUD2', 'SSR1'} 20\n",
       "LUAD\n",
-      "{'SSR1', 'PUM1', 'SERBP1', 'SLC2A1', 'CADM1', 'ALCAM', 'HNRNPU', 'PRKRA', 'PTK7', 'KDM1A', 'KRR1', 'STAU1', 'CDC73', 'OCIAD1', 'HIF1A', 'DHX9', 'CLTC', 'EPCAM'} 18\n",
+      "{'PTK7', 'KRR1', 'OCIAD1', 'SLC2A1', 'DHX9', 'CLTC', 'PRKRA', 'EPCAM', 'KDM1A', 'STAU1', 'HIF1A', 'SERBP1', 'HNRNPU', 'CADM1', 'ALCAM', 'PUM1', 'SSR1', 'CDC73'} 18\n",
       "COAD\n",
-      "{'PROM1', 'CD44', 'HNRNPK', 'ZBTB2', 'TFCP2', 'RNF4', 'SERBP1', 'HNRNPR', 'EPCAM', 'ABCG2', 'HNRNPU', 'ABCB1', 'HNRNPL', 'DHX9', 'RPL4', 'PUM1', 'ALCAM', 'ALDH1A1', 'HNRNPA1', 'ABCC1'} 20\n",
+      "{'ZBTB2', 'DHX9', 'HNRNPL', 'SERBP1', 'ABCC1', 'HNRNPA1', 'ABCG2', 'HNRNPR', 'RNF4', 'ABCB1', 'HNRNPU', 'RPL4', 'TFCP2', 'CD44', 'PROM1', 'EPCAM', 'PUM1', 'HNRNPK', 'ALDH1A1', 'ALCAM'} 20\n",
       "all three\n",
       "set()\n"
      ]
@@ -875,7 +875,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -921,7 +921,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -959,37 +959,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cross_validation(manager, config, log_path, external_testing_dataloader):\n",
-    "    for key, values in manager['TCGA_BLC']['dataloaders'].items():\n",
-    "        if isinstance(key, int) and config['cross_validation']:\n",
-    "            models, optimizers = create_models_and_optimizers(config)\n",
-    "            lit_model = LitFullModel(models, optimizers, config)\n",
-    "            trainer = pl.Trainer(                                               # Create sub-folders for each fold.\n",
-    "                default_root_dir=log_path,\n",
-    "                max_epochs=config['max_epochs'],\n",
-    "                log_every_n_steps=1,\n",
-    "                enable_model_summary=False,\n",
-    "                enable_checkpointing=False,\n",
-    "                \n",
-    "            )\n",
-    "            \n",
-    "            trainer.fit(lit_model, train_dataloaders=values['train'])\n",
-    "            \n",
-    "        \n",
-    "        elif key == 'train':\n",
-    "            train = values\n",
-    "        elif key == 'test':\n",
-    "            test = external_testing_dataloader #values\n",
-    "    return train, test"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1001,32 +971,28 @@
     " \n",
     "    with open(config_file, 'r') as f:\n",
     "        config = yaml.load(f, Loader=yaml.FullLoader)\n",
-    "    override_n_genes(config)                        \n",
+    "    override_n_genes(config)                        # For multi-task graph models.\n",
     "    config_name = Path(config_file).stem\n",
     "\n",
-    "    # Setup logging.\n",
+    "   # Setup logging.\n",
     "    log_path = f'Logs/{config_name}/{datetime.now():%Y-%m-%dT%H:%M:%S}/'\n",
     "    setup_logging(log_path)\n",
-    "    \n",
+    "    #setup_logging(log_path := f'Logs/{config_name}/{datetime.now():%Y-%m-%dT%H:%M:%S}/')\n",
     "    logger = get_logger(config_name)\n",
     "    logger.info(f'Using Random Seed {SEED} for this experiment')\n",
-    "    \n",
     "    get_logger('lightning.pytorch.accelerators.cuda', log_level='WARNING')      # Disable cuda logging.\n",
     "    filterwarnings('ignore', r'.*Skipping val loop.*')                          # Disable val loop warning.\n",
-    "    filterwarnings('ignore', r\".*Your `test_dataloader`'s sampler has shuffling enabled*\")    # Disable val shuffle warning.\n",
-    "\n",
+    "    filterwarnings('ignore', r\".*Your `test_dataloader`'s sampler has shuffling enabled`*\")    # Disable val shuffle warning.\n",
     "\n",
+    "    # Create dataset manager for training data.\n",
+    "    data = {'TCGA_BLC': TCGA_Program_Dataset(**config['datasets'])}\n",
+    "    \n",
     "    #add the external data\n",
     "    external_testing_data = ExternalDataModule(**config['external_datasets']) \n",
     "\n",
     "    external_testing_data.setup()\n",
     "\n",
     "    external_testing_dataloader = external_testing_data.test_dataloader()\n",
-    "\n",
-    "     # Create dataset manager for training data.\n",
-    "    data = {'TCGA_BLC': TCGA_Program_Dataset(**config['datasets'])}\n",
-    "    \n",
-    "    \n",
     "    \n",
     "    if 'TCGA_Balanced_Datasets_Manager' == config['datasets_manager']['type']:\n",
     "        manager = TCGA_Balanced_Datasets_Manager(datasets=data, config=config_add_subdict_key(config))\n",
@@ -1048,13 +1014,14 @@
     "            )\n",
     "            \n",
     "            trainer.fit(lit_model, train_dataloaders=values['train'])\n",
-    "            #trainer.test(lit_model, dataloaders=test, verbose=True)          \n",
+    "                   \n",
     "               \n",
     "            \n",
     "        elif key == 'train':\n",
     "            train = values\n",
     "        elif key == 'test':\n",
-    "             test = external_testing_dataloader\n",
+    "            test = external_testing_dataloader #values\n",
+    "\n",
     "    # Train the final model from scratch with all the training data.\n",
     "    models, optimizers = create_models_and_optimizers(config)\n",
     "    lit_model = LitFullModel(models, optimizers, config)\n",
@@ -1069,11 +1036,8 @@
     "\n",
     "    # Test the final model.\n",
     "    bootstrap_results = []\n",
-    "    for _ in tqdm(range(config['bootstrap_repeats']), desc='Bootstrapping'):\n",
-    "        \n",
+    "    for _ in tqdm(range(config['bootstrap_repeats']), desc='Bootstrapping'):       \n",
     "        bootstrap_results.append(trainer.test(lit_model, dataloaders=test, verbose=False)[0]) \n",
-    "        \n",
-    "\n",
     "    bootstrap_results = pd.DataFrame.from_records(bootstrap_results)\n",
     "    for key, value in bootstrap_results.describe().loc[['mean', 'std']].to_dict().items():\n",
     "        logger.info(f'| {key.ljust(10).upper()} | {value[\"mean\"]:.5f} ± {value[\"std\"]:.5f} |')\n",
@@ -1094,32 +1058,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[INFO]\tUsing Random Seed 1126 for this experiment\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[INFO]\tExternal DS - Total 88 patients\n",
-      "[INFO]\tNormalize clinical numerical data using all samples\n",
-      "[INFO]\tExternal DS - Total 88 samples after removing missing values\n",
-      "[INFO]\tExternal DS - Batch size 128\n",
-      "[INFO]\tExternal DS - Total 88 patients, 20 genomic features and 14 clinical features\n",
-      "[INFO]\tExternal DS - Target Type overall_survival\n",
-      "[INFO]\tNormalize clinical numerical data using all samples\n",
-      "[INFO]\tExternal DS - Total 88 samples after removing missing values\n",
-      "[INFO]\tExternal DS - Total 81 samples\n",
-      "[INFO]\tExternal DS - Total 39 features\n",
-      "[INFO]\tExternal DS - Overall survival imbalance ratio 81.48148148148148 %\n",
-      "[INFO]\tSplitting data into test set...\n",
+      "[INFO]\tUsing Random Seed 1126 for this experiment\n",
       "[INFO]\tCreating a TCGA Program Dataset with 3 Projects...\n",
       "Case metadata {}\n",
       "[INFO]\tNo files to download for project TCGA-BRCA\n",
@@ -1160,8 +1106,20 @@
       "[INFO]\tSaving train and test indices to Cache\n",
       "[INFO]\tTotal 2059 patients, 20 genomic features and 14 clinical features\n",
       "[INFO]\tOverall survival imbalance ratio 17.678484701311316 %\n",
+      "[INFO]\tExternal DS - Total 88 patients\n",
+      "[INFO]\tNormalize clinical numerical data using all samples\n",
+      "[INFO]\tExternal DS - Total 88 samples after removing missing values\n",
+      "[INFO]\tExternal DS - Batch size 128\n",
+      "[INFO]\tExternal DS - Total 88 patients, 20 genomic features and 14 clinical features\n",
+      "[INFO]\tExternal DS - Target Type overall_survival\n",
+      "[INFO]\tNormalize clinical numerical data using all samples\n",
+      "[INFO]\tExternal DS - Total 88 samples after removing missing values\n",
+      "[INFO]\tExternal DS - Total 81 samples\n",
+      "[INFO]\tExternal DS - Total 39 features\n",
+      "[INFO]\tExternal DS - Overall survival imbalance ratio 81.48148148148148 %\n",
+      "[INFO]\tSplitting data into test set...\n",
       "[INFO]\tInitializing a TCGA Balanced Datasets Manager containing 1 Datasets...\n",
-      "[INFO]\tUsing indices cache files created at 2024-06-05 17:58:21 from Cache\n"
+      "[INFO]\tUsing indices cache files created at 2024-06-21 18:36:29 from Cache\n"
      ]
     },
     {
@@ -1172,14 +1130,14 @@
       "TPU available: False, using: 0 TPU cores\n",
       "IPU available: False, using: 0 IPUs\n",
       "HPU available: False, using: 0 HPUs\n",
-      "Missing logger folder: Logs/MTL_train_SCLC_test/2024-06-05T17:57:57/lightning_logs\n"
+      "Missing logger folder: Logs/MTL_train_SCLC_test/2024-06-21T18:36:06/lightning_logs\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 11.87it/s, v_num=0]"
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 36.20it/s, v_num=0]"
      ]
     },
     {
@@ -1193,7 +1151,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 11.84it/s, v_num=0]"
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 35.83it/s, v_num=0]"
      ]
     },
     {
@@ -1211,7 +1169,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 26.08it/s, v_num=1]"
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 36.85it/s, v_num=1]"
      ]
     },
     {
@@ -1225,7 +1183,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 25.93it/s, v_num=1]\n"
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 36.48it/s, v_num=1]"
      ]
     },
     {
@@ -1242,7 +1200,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 12.11it/s, v_num=2]"
+      "\n",
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 36.89it/s, v_num=2]"
      ]
     },
     {
@@ -1256,7 +1215,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 12.08it/s, v_num=2]"
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 36.51it/s, v_num=2]"
      ]
     },
     {
@@ -1274,7 +1233,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 11.89it/s, v_num=3]"
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 33.32it/s, v_num=3]"
      ]
     },
     {
@@ -1288,7 +1247,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 11.86it/s, v_num=3]\n"
+      "Epoch 49: 100%|██████████| 10/10 [00:00<00:00, 33.05it/s, v_num=3]"
      ]
     },
     {
@@ -1308,18 +1267,31 @@
       "952       Trainable params\n",
       "0         Non-trainable params\n",
       "952       Total params\n",
-      "0.004     Total estimated model params size (MB)\n",
+      "0.004     Total estimated model params size (MB)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
       "`Trainer.fit` stopped: `max_epochs=50` reached.\n",
-      "Bootstrapping: 100%|██████████| 1000/1000 [14:56<00:00,  1.12it/s]"
+      "Bootstrapping: 100%|██████████| 1000/1000 [03:38<00:00,  4.57it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[INFO]\t| AUC_2.0    | 0.41925 ± 0.08806 |\n",
-      "[INFO]\t| PRC_2.0    | 0.80238 ± 0.05609 |\n",
-      "[INFO]\t| C-INDEX_2.0 | 0.47474 ± 0.04398 |\n"
+      "[INFO]\t| AUC_1.0    | 0.50015 ± 0.08427 |\n",
+      "[INFO]\t| PRC_1.0    | 0.83636 ± 0.05430 |\n",
+      "[INFO]\t| C-INDEX_1.0 | 0.49382 ± 0.03842 |\n"
      ]
     },
     {
@@ -1336,35 +1308,6 @@
     "trainer, train, test, models, optimizers, config, lit_model, logger = main(\"config/light/MTL_train_SCLC_test.yaml\")\n",
     "\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# save the model\n",
-    "\n",
-    "torch.save(lit_model.state_dict(), 'model.pth')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# load model\n",
-    "\n",
-    "lit_model.load_state_dict(torch.load('model.pth'))\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -1383,7 +1326,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.undefined"
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,