|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 | 5 | "execution_count": 1,
|
6 |
| - "id": "comfortable-blowing", |
| 6 | + "id": "intended-medicaid", |
7 | 7 | "metadata": {},
|
8 | 8 | "outputs": [
|
9 | 9 | {
|
|
12 | 12 | "text": [
|
13 | 13 | "IPython autoawait is `on`, and set to use `asyncio`\n"
|
14 | 14 | ]
|
| 15 | + }, |
| 16 | + { |
| 17 | + "ename": "ModuleNotFoundError", |
| 18 | + "evalue": "No module named 'aries_basic_controller'", |
| 19 | + "output_type": "error", |
| 20 | + "traceback": [ |
| 21 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 22 | + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", |
| 23 | + "\u001b[0;32m<ipython-input-1-cf8eac349ea5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0maries_basic_controller\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maries_controller\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAriesAgentController\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mWEBHOOK_HOST\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"0.0.0.0\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 24 | + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'aries_basic_controller'" |
| 25 | + ] |
15 | 26 | }
|
16 | 27 | ],
|
17 | 28 | "source": [
|
|
32 | 43 | {
|
33 | 44 | "cell_type": "code",
|
34 | 45 | "execution_count": 4,
|
35 |
| - "id": "weird-lunch", |
| 46 | + "id": "approved-reverse", |
36 | 47 | "metadata": {},
|
37 | 48 | "outputs": [],
|
38 | 49 | "source": [
|
39 |
| - "\n", |
40 | 50 | "loop = asyncio.get_event_loop()\n",
|
41 | 51 | "loop.create_task(agent_controller.listen_webhooks())\n",
|
42 | 52 | "\n",
|
|
67 | 77 | },
|
68 | 78 | {
|
69 | 79 | "cell_type": "markdown",
|
70 |
| - "id": "anonymous-president", |
| 80 | + "id": "amino-router", |
71 | 81 | "metadata": {},
|
72 | 82 | "source": [
|
73 | 83 | "## Copy Invite from Researcher"
|
|
76 | 86 | {
|
77 | 87 | "cell_type": "code",
|
78 | 88 | "execution_count": 5,
|
79 |
| - "id": "expired-double", |
| 89 | + "id": "tired-atmosphere", |
80 | 90 | "metadata": {},
|
81 | 91 | "outputs": [],
|
82 | 92 | "source": [
|
|
87 | 97 | {
|
88 | 98 | "cell_type": "code",
|
89 | 99 | "execution_count": 6,
|
90 |
| - "id": "finished-milan", |
| 100 | + "id": "diagnostic-colombia", |
91 | 101 | "metadata": {},
|
92 | 102 | "outputs": [
|
93 | 103 | {
|
|
117 | 127 | {
|
118 | 128 | "cell_type": "code",
|
119 | 129 | "execution_count": null,
|
120 |
| - "id": "objective-fabric", |
| 130 | + "id": "straight-frame", |
121 | 131 | "metadata": {},
|
122 | 132 | "outputs": [],
|
123 | 133 | "source": [
|
|
130 | 140 | },
|
131 | 141 | {
|
132 | 142 | "cell_type": "code",
|
133 |
| - "execution_count": null, |
134 |
| - "id": "spanish-officer", |
| 143 | + "execution_count": 6, |
| 144 | + "id": "similar-tracy", |
135 | 145 | "metadata": {},
|
136 |
| - "outputs": [], |
137 |
| - "source": [] |
| 146 | + "outputs": [ |
| 147 | + { |
| 148 | + "data": { |
| 149 | + "text/plain": [ |
| 150 | + "333" |
| 151 | + ] |
| 152 | + }, |
| 153 | + "execution_count": 6, |
| 154 | + "metadata": {}, |
| 155 | + "output_type": "execute_result" |
| 156 | + } |
| 157 | + ], |
| 158 | + "source": [ |
| 159 | + "# Data pre-processing\n", |
| 160 | + "\n", |
| 161 | + "import numpy as np # linear algebra\n", |
| 162 | + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", |
| 163 | + "import matplotlib.pyplot as plt\n", |
| 164 | + "import seaborn as sns\n", |
| 165 | + "\n", |
| 166 | + "\n", |
| 167 | + "# prep\n", |
| 168 | + "from sklearn.model_selection import train_test_split\n", |
| 169 | + "from sklearn import preprocessing\n", |
| 170 | + "from sklearn.datasets import make_classification\n", |
| 171 | + "from sklearn.preprocessing import binarize, LabelEncoder, MinMaxScaler\n", |
| 172 | + "\n", |
| 173 | + "#Read in Data\n", |
| 174 | + "train_df = pd.read_csv('hospital1.csv')\n", |
| 175 | + "\n", |
| 176 | + "\n", |
| 177 | + "########## START DATA CLEANING ###############\n", |
| 178 | + "\n", |
| 179 | + "\n", |
| 180 | + "#dealing with missing data\n", |
| 181 | + "#Let’s get rid of the variables \"Timestamp\",“comments”, “state” just to make our lives easier.\n", |
| 182 | + "train_df = train_df.drop(['comments'], axis= 1)\n", |
| 183 | + "train_df = train_df.drop(['state'], axis= 1)\n", |
| 184 | + "train_df = train_df.drop(['Timestamp'], axis= 1)\n", |
| 185 | + "\n", |
| 186 | + "# Assign default values for each data type\n", |
| 187 | + "defaultInt = 0\n", |
| 188 | + "defaultString = 'NaN'\n", |
| 189 | + "defaultFloat = 0.0\n", |
| 190 | + "\n", |
| 191 | + "# Create lists by data tpe\n", |
| 192 | + "intFeatures = ['Age']\n", |
| 193 | + "stringFeatures = ['Gender', 'Country', 'self_employed', 'family_history', 'treatment', 'work_interfere',\n", |
| 194 | + " 'no_employees', 'remote_work', 'tech_company', 'anonymity', 'leave', 'mental_health_consequence',\n", |
| 195 | + " 'phys_health_consequence', 'coworkers', 'supervisor', 'mental_health_interview', 'phys_health_interview',\n", |
| 196 | + " 'mental_vs_physical', 'obs_consequence', 'benefits', 'care_options', 'wellness_program',\n", |
| 197 | + " 'seek_help']\n", |
| 198 | + "floatFeatures = []\n", |
| 199 | + "\n", |
| 200 | + "# Clean the NaN's\n", |
| 201 | + "for feature in train_df:\n", |
| 202 | + " if feature in intFeatures:\n", |
| 203 | + " train_df[feature] = train_df[feature].fillna(defaultInt)\n", |
| 204 | + " elif feature in stringFeatures:\n", |
| 205 | + " train_df[feature] = train_df[feature].fillna(defaultString)\n", |
| 206 | + " elif feature in floatFeatures:\n", |
| 207 | + " train_df[feature] = train_df[feature].fillna(defaultFloat)\n", |
| 208 | + " else:\n", |
| 209 | + " print('Error: Feature %s not recognized.' % feature)\n", |
| 210 | + "\n", |
| 211 | + "#clean 'Gender'\n", |
| 212 | + "#Slower case all columm's elements\n", |
| 213 | + "gender = train_df['Gender'].str.lower()\n", |
| 214 | + "#print(gender)\n", |
| 215 | + "\n", |
| 216 | + "#Select unique elements\n", |
| 217 | + "gender = train_df['Gender'].unique()\n", |
| 218 | + "\n", |
| 219 | + "#Made gender groups\n", |
| 220 | + "male_str = [\"male\", \"m\", \"male-ish\", \"maile\", \"mal\", \"male (cis)\", \"make\", \"male \", \"man\",\"msle\", \"mail\", \"malr\",\"cis man\", \"Cis Male\", \"cis male\"]\n", |
| 221 | + "trans_str = [\"trans-female\", \"something kinda male?\", \"queer/she/they\", \"non-binary\",\"nah\", \"all\", \"enby\", \"fluid\", \"genderqueer\", \"androgyne\", \"agender\", \"male leaning androgynous\", \"guy (-ish) ^_^\", \"trans woman\", \"neuter\", \"female (trans)\", \"queer\", \"ostensibly male, unsure what that really means\"]\n", |
| 222 | + "female_str = [\"cis female\", \"f\", \"female\", \"woman\", \"femake\", \"female \",\"cis-female/femme\", \"female (cis)\", \"femail\"]\n", |
| 223 | + "\n", |
| 224 | + "for (row, col) in train_df.iterrows():\n", |
| 225 | + "\n", |
| 226 | + " if str.lower(col.Gender) in male_str:\n", |
| 227 | + " train_df['Gender'].replace(to_replace=col.Gender, value='male', inplace=True)\n", |
| 228 | + "\n", |
| 229 | + " if str.lower(col.Gender) in female_str:\n", |
| 230 | + " train_df['Gender'].replace(to_replace=col.Gender, value='female', inplace=True)\n", |
| 231 | + "\n", |
| 232 | + " if str.lower(col.Gender) in trans_str:\n", |
| 233 | + " train_df['Gender'].replace(to_replace=col.Gender, value='trans', inplace=True)\n", |
| 234 | + "\n", |
| 235 | + "#Get rid of bullshit\n", |
| 236 | + "stk_list = ['A little about you', 'p']\n", |
| 237 | + "train_df = train_df[~train_df['Gender'].isin(stk_list)]\n", |
| 238 | + "\n", |
| 239 | + "#complete missing age with mean\n", |
| 240 | + "train_df['Age'].fillna(train_df['Age'].median(), inplace = True)\n", |
| 241 | + "\n", |
| 242 | + "# Fill with media() values < 18 and > 120\n", |
| 243 | + "s = pd.Series(train_df['Age'])\n", |
| 244 | + "s[s<18] = train_df['Age'].median()\n", |
| 245 | + "train_df['Age'] = s\n", |
| 246 | + "s = pd.Series(train_df['Age'])\n", |
| 247 | + "s[s>120] = train_df['Age'].median()\n", |
| 248 | + "train_df['Age'] = s\n", |
| 249 | + "\n", |
| 250 | + "#Ranges of Age\n", |
| 251 | + "train_df['age_range'] = pd.cut(train_df['Age'], [0,20,30,65,100], labels=[\"0-20\", \"21-30\", \"31-65\", \"66-100\"], include_lowest=True)\n", |
| 252 | + "\n", |
| 253 | + "#There are only 0.20% of self work_interfere so let's change NaN to \"Don't know\n", |
| 254 | + "#Replace \"NaN\" string from defaultString\n", |
| 255 | + "\n", |
| 256 | + "train_df['work_interfere'] = train_df['work_interfere'].replace([defaultString], 'Don\\'t know' )\n", |
| 257 | + "\n", |
| 258 | + "#Encoding data\n", |
| 259 | + "labelDict = {}\n", |
| 260 | + "for feature in train_df:\n", |
| 261 | + " le = preprocessing.LabelEncoder()\n", |
| 262 | + " le.fit(train_df[feature])\n", |
| 263 | + " le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))\n", |
| 264 | + " train_df[feature] = le.transform(train_df[feature])\n", |
| 265 | + " # Get labels\n", |
| 266 | + " labelKey = 'label_' + feature\n", |
| 267 | + " labelValue = [*le_name_mapping]\n", |
| 268 | + " labelDict[labelKey] =labelValue\n", |
| 269 | + "\n", |
| 270 | + "#Get rid of 'Country'\n", |
| 271 | + "train_df = train_df.drop(['Country'], axis= 1)\n", |
| 272 | + "\n", |
| 273 | + "# Scaling Age\n", |
| 274 | + "scaler = MinMaxScaler()\n", |
| 275 | + "train_df['Age'] = scaler.fit_transform(train_df[['Age']])\n", |
| 276 | + "\n", |
| 277 | + "# define X and y\n", |
| 278 | + "feature_cols = ['Age', 'Gender', 'family_history', 'benefits', 'care_options', 'anonymity', 'leave', 'work_interfere']\n", |
| 279 | + "X = train_df[feature_cols]\n", |
| 280 | + "y = train_df.treatment\n", |
| 281 | + "\n", |
| 282 | + "# split X and y into training and testing sets\n", |
| 283 | + "X_train, y_train = X, y\n", |
| 284 | + "\n", |
| 285 | + "# Transform pandas dataframe to torch tensor for DL\n", |
| 286 | + "\n", |
| 287 | + "x_train_data = torch.from_numpy(X_train.values)\n", |
| 288 | + "x_train_data = x_train_data.float()\n", |
| 289 | + "\n", |
| 290 | + "y_train_data = []\n", |
| 291 | + "for data in y_train.values:\n", |
| 292 | + " y_train_data.append([data])\n", |
| 293 | + "y_train_data = torch.tensor(y_train_data).float()\n", |
| 294 | + "\n", |
| 295 | + "len(y_train_data)" |
| 296 | + ] |
| 297 | + }, |
| 298 | + { |
| 299 | + "cell_type": "code", |
| 300 | + "execution_count": 7, |
| 301 | + "id": "interesting-marathon", |
| 302 | + "metadata": {}, |
| 303 | + "outputs": [ |
| 304 | + { |
| 305 | + "name": "stdout", |
| 306 | + "output_type": "stream", |
| 307 | + "text": [ |
| 308 | + "/Users/pavlito/PyDentity/projects/aries-fl/notebooks/hospital/../model.pt\n", |
| 309 | + "HOSPITAL MODEL LOADED\n", |
| 310 | + "HOSPITAL IS TRAINING\n", |
| 311 | + "loss at epoch 0 : tensor(0.2684)\n", |
| 312 | + "loss at epoch 5000 : tensor(0.1264)\n", |
| 313 | + "loss at epoch 10000 : tensor(0.1152)\n", |
| 314 | + "loss at epoch 15000 : tensor(0.1114)\n", |
| 315 | + "loss at epoch 20000 : tensor(0.1083)\n", |
| 316 | + "loss at epoch 25000 : tensor(0.1058)\n", |
| 317 | + "loss at epoch 30000 : tensor(0.1040)\n", |
| 318 | + "loss at epoch 35000 : tensor(0.1023)\n", |
| 319 | + "loss at epoch 40000 : tensor(0.1008)\n", |
| 320 | + "loss at epoch 45000 : tensor(0.0996)\n" |
| 321 | + ] |
| 322 | + } |
| 323 | + ], |
| 324 | + "source": [ |
| 325 | + "import torch\n", |
| 326 | + "import traceback\n", |
| 327 | + "\n", |
| 328 | + "# models\n", |
| 329 | + "from torch import nn\n", |
| 330 | + "from torch import optim\n", |
| 331 | + "from torch.autograd import Variable\n", |
| 332 | + "import os\n", |
| 333 | + "import sys\n", |
| 334 | + "\n", |
| 335 | + "# Receive model from the Researcher and train it\n", |
| 336 | + "\n", |
| 337 | + "model_dir = os.getcwd() + \"/../model.pt\"\n", |
| 338 | + "\n", |
| 339 | + "print(model_dir)\n", |
| 340 | + "\n", |
| 341 | + "# Pull in model\n", |
| 342 | + "try:\n", |
| 343 | + " model = torch.load(model_dir)\n", |
| 344 | + "except Exception as e:\n", |
| 345 | + " print(\"HOSPITAL FAILED TO LOAD MODEL\")\n", |
| 346 | + " print(\"Exception Value: \",e)\n", |
| 347 | + " print(\"Traceback \",traceback.format_exc())\n", |
| 348 | + "# return False\n", |
| 349 | + "\n", |
| 350 | + "print(\"HOSPITAL MODEL LOADED\")\n", |
| 351 | + "\n", |
| 352 | + "\n", |
| 353 | + "# Training Logic\n", |
| 354 | + "print(\"HOSPITAL IS TRAINING\")\n", |
| 355 | + "\n", |
| 356 | + "# Define Optimizer\n", |
| 357 | + "opt = optim.SGD(params=model.parameters(), lr=0.1)\n", |
| 358 | + " \n", |
| 359 | + "# opt = torch.optim.SGD(model.parameters(), lr=0.05)\n", |
| 360 | + "\n", |
| 361 | + "\n", |
| 362 | + "# Apply Differential Privacy\n", |
| 363 | + "\n", |
| 364 | + "#privacy_engine = PrivacyEngine(model, batch_size=333, sample_size=1000, alphas=[10, 100], \n", |
| 365 | + "# noise_multiplier=1.3, max_grad_norm=1.0)\n", |
| 366 | + "\n", |
| 367 | + "# privacy_engine.attach(opt)\n", |
| 368 | + "\n", |
| 369 | + "for iter in range(50000):\n", |
| 370 | + "\n", |
| 371 | + " # 1) erase previous gradients (if they exist)\n", |
| 372 | + " opt.zero_grad()\n", |
| 373 | + " # log_msg(\"TRAIN DATA\", x_train_data)\n", |
| 374 | + "\n", |
| 375 | + " # 2) make a prediction\n", |
| 376 | + " pred = model(x_train_data)\n", |
| 377 | + "\n", |
| 378 | + " # 3) calculate how much we missed\n", |
| 379 | + " loss = (((y_train_data - pred) ** 2).sum()) / len(x_train_data)\n", |
| 380 | + "\n", |
| 381 | + " # 4) figure out which weights caused us to miss\n", |
| 382 | + " loss.backward()\n", |
| 383 | + "\n", |
| 384 | + " # 5) change those weights\n", |
| 385 | + " opt.step()\n", |
| 386 | + "\n", |
| 387 | + " # 6) log_msg our progress\n", |
| 388 | + " if (iter % 5000 == 0):\n", |
| 389 | + " print(\"loss at epoch \", iter, \": \", loss.data)\n", |
| 390 | + "\n", |
| 391 | + "torch.save(model, \"../trained_model.pt\")\n", |
| 392 | + "\n", |
| 393 | + "# Detach the Differential Privacy library from the Optimizer (We may don't need this at all)\n", |
| 394 | + "# privacy_engine.detach()\n", |
| 395 | + "\n" |
| 396 | + ] |
138 | 397 | },
|
139 | 398 | {
|
140 | 399 | "cell_type": "code",
|
141 | 400 | "execution_count": null,
|
142 |
| - "id": "laughing-demand", |
| 401 | + "id": "martial-military", |
143 | 402 | "metadata": {},
|
144 | 403 | "outputs": [],
|
145 | 404 | "source": [
|
|
0 commit comments